Project Full coverage report
Current view: Cores-VeeR-EL2—Cores-VeeR-EL2—design—exu—el2_exu_mul_ctl.sv Coverage Hit Total
Test Date: 21-11-2024 Toggle 76.0% 57 75
Test: all Branch 100.0% 60 60

            Line data    Source code
       1              : // SPDX-License-Identifier: Apache-2.0
       2              : // Copyright 2020 Western Digital Corporation or its affiliates.
       3              : //
       4              : // Licensed under the Apache License, Version 2.0 (the "License");
       5              : // you may not use this file except in compliance with the License.
       6              : // You may obtain a copy of the License at
       7              : //
       8              : // http://www.apache.org/licenses/LICENSE-2.0
       9              : //
      10              : // Unless required by applicable law or agreed to in writing, software
      11              : // distributed under the License is distributed on an "AS IS" BASIS,
      12              : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      13              : // See the License for the specific language governing permissions and
      14              : // limitations under the License.
      15              : 
      16              : 
      17              : module el2_exu_mul_ctl
      18              : import el2_pkg::*;
      19              : #(
      20              : `include "el2_param.vh"
      21              :  )
      22              :   (
      23     69840792 :    input logic          clk,              // Top level clock
      24          339 :    input logic          rst_l,            // Reset
      25              :    // Excluding scan_mode from coverage as its usage is determined by the integrator of the VeeR core.
      26              :    /*verilator coverage_off*/
      27              :    input logic          scan_mode,        // Scan mode
      28              :    /*verilator coverage_on*/
      29              : 
      30            0 :    input el2_mul_pkt_t mul_p,            // {Valid, RS1 signed operand, RS2 signed operand, Select low 32-bits of result}
      31              : 
      32        20188 :    input logic [31:0]   rs1_in,           // A operand
      33        22747 :    input logic [31:0]   rs2_in,           // B operand
      34              : 
      35              : 
      36        17528 :    output logic [31:0]  result_x          // Result
      37              :   );
      38              : 
      39              : 
      40       251048 :    logic                mul_x_enable;
      41       251048 :    logic                bit_x_enable;
      42        16868 :    logic signed [32:0]  rs1_ext_in;
      43        12631 :    logic signed [32:0]  rs2_ext_in;
      44        34802 :    logic        [65:0]  prod_x;
      45        16705 :    logic                low_x;
      46              : 
      47              : 
      48              : 
      49              :    // *** Start - BitManip ***
      50              : 
      51         2770 :    logic                bitmanip_sel_d;
      52         1428 :    logic                bitmanip_sel_x;
      53          242 :    logic        [31:0]  bitmanip_d;
      54          224 :    logic        [31:0]  bitmanip_x;
      55              : 
      56              : 
      57              : 
      58              :    // ZBE
      59            0 :    logic                ap_bcompress;
      60            0 :    logic                ap_bdecompress;
      61              : 
      62              :    // ZBC
      63          852 :    logic                ap_clmul;
      64          888 :    logic                ap_clmulh;
      65         1030 :    logic                ap_clmulr;
      66              : 
      67              :    // ZBP
      68            0 :    logic                ap_grev;
      69            0 :    logic                ap_gorc;
      70            0 :    logic                ap_shfl;
      71            0 :    logic                ap_unshfl;
      72            0 :    logic                ap_xperm_n;
      73            0 :    logic                ap_xperm_b;
      74            0 :    logic                ap_xperm_h;
      75              : 
      76              :    // ZBR
      77            0 :    logic                ap_crc32_b;
      78            0 :    logic                ap_crc32_h;
      79            0 :    logic                ap_crc32_w;
      80            0 :    logic                ap_crc32c_b;
      81            0 :    logic                ap_crc32c_h;
      82            0 :    logic                ap_crc32c_w;
      83              : 
      84              :    // ZBF
      85            0 :    logic                ap_bfp;
      86              : 
      87              : 
      88              :    if (pt.BITMANIP_ZBE == 1)
      89              :      begin
      90              :        assign ap_bcompress    =  mul_p.bcompress;
      91              :        assign ap_bdecompress  =  mul_p.bdecompress;
      92              :      end
      93              :    else
      94              :      begin
      95              :        assign ap_bcompress    =  1'b0;
      96              :        assign ap_bdecompress  =  1'b0;
      97              :      end
      98              : 
      99              :    if (pt.BITMANIP_ZBC == 1)
     100              :      begin
     101              :        assign ap_clmul        =  mul_p.clmul;
     102              :        assign ap_clmulh       =  mul_p.clmulh;
     103              :        assign ap_clmulr       =  mul_p.clmulr;
     104              :      end
     105              :    else
     106              :      begin
     107              :        assign ap_clmul        =  1'b0;
     108              :        assign ap_clmulh       =  1'b0;
     109              :        assign ap_clmulr       =  1'b0;
     110              :      end
     111              : 
     112              :    if (pt.BITMANIP_ZBP == 1)
     113              :      begin
     114              :        assign ap_grev         =  mul_p.grev;
     115              :        assign ap_gorc         =  mul_p.gorc;
     116              :        assign ap_shfl         =  mul_p.shfl;
     117              :        assign ap_unshfl       =  mul_p.unshfl;
     118              :        assign ap_xperm_n      =  mul_p.xperm_n;
     119              :        assign ap_xperm_b      =  mul_p.xperm_b;
     120              :        assign ap_xperm_h      =  mul_p.xperm_h;
     121              :      end
     122              :    else
     123              :      begin
     124              :        assign ap_grev         =  1'b0;
     125              :        assign ap_gorc         =  1'b0;
     126              :        assign ap_shfl         =  1'b0;
     127              :        assign ap_unshfl       =  1'b0;
     128              :        assign ap_xperm_n      =  1'b0;
     129              :        assign ap_xperm_b      =  1'b0;
     130              :        assign ap_xperm_h      =  1'b0;
     131              :      end
     132              : 
     133              :    if (pt.BITMANIP_ZBR == 1)
     134              :      begin
     135              :        assign ap_crc32_b      =  mul_p.crc32_b;
     136              :        assign ap_crc32_h      =  mul_p.crc32_h;
     137              :        assign ap_crc32_w      =  mul_p.crc32_w;
     138              :        assign ap_crc32c_b     =  mul_p.crc32c_b;
     139              :        assign ap_crc32c_h     =  mul_p.crc32c_h;
     140              :        assign ap_crc32c_w     =  mul_p.crc32c_w;
     141              :      end
     142              :    else
     143              :      begin
     144              :        assign ap_crc32_b      =  1'b0;
     145              :        assign ap_crc32_h      =  1'b0;
     146              :        assign ap_crc32_w      =  1'b0;
     147              :        assign ap_crc32c_b     =  1'b0;
     148              :        assign ap_crc32c_h     =  1'b0;
     149              :        assign ap_crc32c_w     =  1'b0;
     150              :      end
     151              : 
     152              :    if (pt.BITMANIP_ZBF == 1)
     153              :      begin
     154              :        assign ap_bfp          =  mul_p.bfp;
     155              :      end
     156              :    else
     157              :      begin
     158              :        assign ap_bfp          =  1'b0;
     159              :      end
     160              : 
     161              : 
     162              :    // *** End   - BitManip ***
     163              : 
     164              : 
     165              : 
     166              :    assign mul_x_enable           =  mul_p.valid;
     167              :    assign bit_x_enable           =  mul_p.valid;
     168              : 
     169              :    assign rs1_ext_in[32]         =  mul_p.rs1_sign & rs1_in[31];
     170              :    assign rs2_ext_in[32]         =  mul_p.rs2_sign & rs2_in[31];
     171              : 
     172              :    assign rs1_ext_in[31:0]       =  rs1_in[31:0];
     173              :    assign rs2_ext_in[31:0]       =  rs2_in[31:0];
     174              : 
     175              : 
     176              : 
     177              :    // --------------------------- Multiply       ----------------------------------
     178              : 
     179              : 
     180        12410 :    logic signed [32:0]  rs1_x;
     181        11050 :    logic signed [32:0]  rs2_x;
     182              : 
     183              :    rvdffe #(34) i_a_x_ff         (.*, .clk(clk),  .din({mul_p.low,rs1_ext_in[32:0]}),        .dout({low_x,rs1_x[32:0]}),                 .en(mul_x_enable));
     184              :    rvdffe #(33) i_b_x_ff         (.*, .clk(clk),  .din(           rs2_ext_in[32:0] ),        .dout(       rs2_x[32:0] ),                 .en(mul_x_enable));
     185              : 
     186              : 
     187              :    assign prod_x[65:0]           =  rs1_x  *  rs2_x;
     188              : 
     189              : 
     190              : 
     191              : 
     192              :    // * * * * * * * * * * * * * * * * * *  BitManip  :  BCOMPRESS, BDECOMPRESS * * * * * * * * * * * * *
     193              : 
     194              : 
     195              :    // *** BCOMPRESS == "gather"  ***
     196              : 
     197         1362 :    logic        [31:0]    bcompress_d;
     198        41861 :    logic                  bcompress_test_bit_d;
     199              :    integer                bcompress_i, bcompress_j;
     200              : 
     201              : 
     202          340 :    always_comb
     203          340 :      begin
     204              : 
     205          340 :        bcompress_j                             =      0;
     206          340 :        bcompress_test_bit_d                    =   1'b0;
     207          340 :        bcompress_d[31:0]                       =  32'b0;
     208              : 
     209          340 :        for (bcompress_i=0; bcompress_i<32; bcompress_i++)
     210        10880 :          begin
     211        10880 :              bcompress_test_bit_d              =  rs2_in[bcompress_i];
     212    938946400 :              if (bcompress_test_bit_d)
     213      1873760 :                begin
     214      1873760 :                   bcompress_d[bcompress_j]     =  rs1_in[bcompress_i];
     215      1873760 :                   bcompress_j                  =  bcompress_j + 1;
     216              :                end  // IF  bcompress_test_bit
     217              :          end        // FOR bcompress_i
     218              :      end            // ALWAYS_COMB
     219              : 
     220              : 
     221              : 
     222              :    // *** BDECOMPRESS == "scatter" ***
     223              : 
     224         7191 :    logic        [31:0]    bdecompress_d;
     225        41861 :    logic                  bdecompress_test_bit_d;
     226              :    integer                bdecompress_i, bdecompress_j;
     227              : 
     228              : 
     229          340 :    always_comb
     230          340 :      begin
     231              : 
     232          340 :        bdecompress_j                           =      0;
     233          340 :        bdecompress_test_bit_d                  =   1'b0;
     234          340 :        bdecompress_d[31:0]                     =  32'b0;
     235              : 
     236          340 :        for (bdecompress_i=0; bdecompress_i<32; bdecompress_i++)
     237        10880 :          begin
     238        10880 :              bdecompress_test_bit_d            =  rs2_in[bdecompress_i];
     239    938946400 :              if (bdecompress_test_bit_d)
     240      1873760 :                begin
     241      1873760 :                   bdecompress_d[bdecompress_i] =  rs1_in[bdecompress_j];
     242      1873760 :                   bdecompress_j                =  bdecompress_j + 1;
     243              :                end  // IF  bdecompress_test_bit
     244              :          end        // FOR bdecompress_i
     245              :      end            // ALWAYS_COMB
     246              : 
     247              : 
     248              : 
     249              : 
     250              :    // * * * * * * * * * * * * * * * * * *  BitManip  :  CLMUL, CLMULH, CLMULR  * * * * * * * * * * * * *
     251              : 
     252        57611 :    logic        [62:0]    clmul_raw_d;
     253              : 
     254              : 
     255              :    assign clmul_raw_d[62:0]      = ( {63{rs2_in[00]}} & {31'b0,rs1_in[31:0]      } ) ^
     256              :                                    ( {63{rs2_in[01]}} & {30'b0,rs1_in[31:0], 1'b0} ) ^
     257              :                                    ( {63{rs2_in[02]}} & {29'b0,rs1_in[31:0], 2'b0} ) ^
     258              :                                    ( {63{rs2_in[03]}} & {28'b0,rs1_in[31:0], 3'b0} ) ^
     259              :                                    ( {63{rs2_in[04]}} & {27'b0,rs1_in[31:0], 4'b0} ) ^
     260              :                                    ( {63{rs2_in[05]}} & {26'b0,rs1_in[31:0], 5'b0} ) ^
     261              :                                    ( {63{rs2_in[06]}} & {25'b0,rs1_in[31:0], 6'b0} ) ^
     262              :                                    ( {63{rs2_in[07]}} & {24'b0,rs1_in[31:0], 7'b0} ) ^
     263              :                                    ( {63{rs2_in[08]}} & {23'b0,rs1_in[31:0], 8'b0} ) ^
     264              :                                    ( {63{rs2_in[09]}} & {22'b0,rs1_in[31:0], 9'b0} ) ^
     265              :                                    ( {63{rs2_in[10]}} & {21'b0,rs1_in[31:0],10'b0} ) ^
     266              :                                    ( {63{rs2_in[11]}} & {20'b0,rs1_in[31:0],11'b0} ) ^
     267              :                                    ( {63{rs2_in[12]}} & {19'b0,rs1_in[31:0],12'b0} ) ^
     268              :                                    ( {63{rs2_in[13]}} & {18'b0,rs1_in[31:0],13'b0} ) ^
     269              :                                    ( {63{rs2_in[14]}} & {17'b0,rs1_in[31:0],14'b0} ) ^
     270              :                                    ( {63{rs2_in[15]}} & {16'b0,rs1_in[31:0],15'b0} ) ^
     271              :                                    ( {63{rs2_in[16]}} & {15'b0,rs1_in[31:0],16'b0} ) ^
     272              :                                    ( {63{rs2_in[17]}} & {14'b0,rs1_in[31:0],17'b0} ) ^
     273              :                                    ( {63{rs2_in[18]}} & {13'b0,rs1_in[31:0],18'b0} ) ^
     274              :                                    ( {63{rs2_in[19]}} & {12'b0,rs1_in[31:0],19'b0} ) ^
     275              :                                    ( {63{rs2_in[20]}} & {11'b0,rs1_in[31:0],20'b0} ) ^
     276              :                                    ( {63{rs2_in[21]}} & {10'b0,rs1_in[31:0],21'b0} ) ^
     277              :                                    ( {63{rs2_in[22]}} & { 9'b0,rs1_in[31:0],22'b0} ) ^
     278              :                                    ( {63{rs2_in[23]}} & { 8'b0,rs1_in[31:0],23'b0} ) ^
     279              :                                    ( {63{rs2_in[24]}} & { 7'b0,rs1_in[31:0],24'b0} ) ^
     280              :                                    ( {63{rs2_in[25]}} & { 6'b0,rs1_in[31:0],25'b0} ) ^
     281              :                                    ( {63{rs2_in[26]}} & { 5'b0,rs1_in[31:0],26'b0} ) ^
     282              :                                    ( {63{rs2_in[27]}} & { 4'b0,rs1_in[31:0],27'b0} ) ^
     283              :                                    ( {63{rs2_in[28]}} & { 3'b0,rs1_in[31:0],28'b0} ) ^
     284              :                                    ( {63{rs2_in[29]}} & { 2'b0,rs1_in[31:0],29'b0} ) ^
     285              :                                    ( {63{rs2_in[30]}} & { 1'b0,rs1_in[31:0],30'b0} ) ^
     286              :                                    ( {63{rs2_in[31]}} & {      rs1_in[31:0],31'b0} );
     287              : 
     288              : 
     289              : 
     290              : 
     291              :    // * * * * * * * * * * * * * * * * * *  BitManip  :  GREV         * * * * * * * * * * * * * * * * * *
     292              : 
     293              :    // uint32_t grev32(uint32_t rs1, uint32_t rs2)
     294              :    // {
     295              :    //     uint32_t x = rs1;
     296              :    //     int shamt = rs2 & 31;
     297              :    //
     298              :    //     if (shamt &  1)  x = ( (x & 0x55555555) <<  1) | ( (x & 0xAAAAAAAA) >>  1);
     299              :    //     if (shamt &  2)  x = ( (x & 0x33333333) <<  2) | ( (x & 0xCCCCCCCC) >>  2);
     300              :    //     if (shamt &  4)  x = ( (x & 0x0F0F0F0F) <<  4) | ( (x & 0xF0F0F0F0) >>  4);
     301              :    //     if (shamt &  8)  x = ( (x & 0x00FF00FF) <<  8) | ( (x & 0xFF00FF00) >>  8);
     302              :    //     if (shamt & 16)  x = ( (x & 0x0000FFFF) << 16) | ( (x & 0xFFFF0000) >> 16);
     303              :    //
     304              :    //     return x;
     305              :    //  }
     306              : 
     307              : 
     308        29061 :    logic        [31:0]    grev1_d;
     309        26044 :    logic        [31:0]    grev2_d;
     310        24671 :    logic        [31:0]    grev4_d;
     311        46661 :    logic        [31:0]    grev8_d;
     312        35104 :    logic        [31:0]    grev_d;
     313              : 
     314              : 
     315              :    assign grev1_d[31:0]       = (rs2_in[0])  ?  {rs1_in[30],rs1_in[31],rs1_in[28],rs1_in[29],rs1_in[26],rs1_in[27],rs1_in[24],rs1_in[25],
     316              :                                                  rs1_in[22],rs1_in[23],rs1_in[20],rs1_in[21],rs1_in[18],rs1_in[19],rs1_in[16],rs1_in[17],
     317              :                                                  rs1_in[14],rs1_in[15],rs1_in[12],rs1_in[13],rs1_in[10],rs1_in[11],rs1_in[08],rs1_in[09],
     318              :                                                  rs1_in[06],rs1_in[07],rs1_in[04],rs1_in[05],rs1_in[02],rs1_in[03],rs1_in[00],rs1_in[01]}  :  rs1_in[31:0];
     319              : 
     320              :    assign grev2_d[31:0]       = (rs2_in[1])  ?  {grev1_d[29:28],grev1_d[31:30],grev1_d[25:24],grev1_d[27:26],
     321              :                                                  grev1_d[21:20],grev1_d[23:22],grev1_d[17:16],grev1_d[19:18],
     322              :                                                  grev1_d[13:12],grev1_d[15:14],grev1_d[09:08],grev1_d[11:10],
     323              :                                                  grev1_d[05:04],grev1_d[07:06],grev1_d[01:00],grev1_d[03:02]}  :  grev1_d[31:0];
     324              : 
     325              :    assign grev4_d[31:0]       = (rs2_in[2])  ?  {grev2_d[27:24],grev2_d[31:28],grev2_d[19:16],grev2_d[23:20],
     326              :                                                  grev2_d[11:08],grev2_d[15:12],grev2_d[03:00],grev2_d[07:04]}  :  grev2_d[31:0];
     327              : 
     328              :    assign grev8_d[31:0]       = (rs2_in[3])  ?  {grev4_d[23:16],grev4_d[31:24],grev4_d[07:00],grev4_d[15:08]}  :  grev4_d[31:0];
     329              : 
     330              :    assign grev_d[31:0]        = (rs2_in[4])  ?  {grev8_d[15:00],grev8_d[31:16]}  :  grev8_d[31:0];
     331              : 
     332              : 
     333              : 
     334              : 
     335              :    // * * * * * * * * * * * * * * * * * *  BitManip  :  GORC         * * * * * * * * * * * * * * * * * *
     336              : 
     337              :    // uint32_t gorc32(uint32_t rs1, uint32_t rs2)
     338              :    // {
     339              :    //     uint32_t x = rs1;
     340              :    //     int shamt = rs2 & 31;
     341              :    //
     342              :    //     if (shamt &  1)  x |= ( (x & 0x55555555) <<  1) | ( (x & 0xAAAAAAAA) >>  1);
     343              :    //     if (shamt &  2)  x |= ( (x & 0x33333333) <<  2) | ( (x & 0xCCCCCCCC) >>  2);
     344              :    //     if (shamt &  4)  x |= ( (x & 0x0F0F0F0F) <<  4) | ( (x & 0xF0F0F0F0) >>  4);
     345              :    //     if (shamt &  8)  x |= ( (x & 0x00FF00FF) <<  8) | ( (x & 0xFF00FF00) >>  8);
     346              :    //     if (shamt & 16)  x |= ( (x & 0x0000FFFF) << 16) | ( (x & 0xFFFF0000) >> 16);
     347              :    //
     348              :    //     return x;
     349              :    //  }
     350              : 
     351              : 
     352        31102 :    logic        [31:0]    gorc1_d;
     353        32894 :    logic        [31:0]    gorc2_d;
     354        35793 :    logic        [31:0]    gorc4_d;
     355        80845 :    logic        [31:0]    gorc8_d;
     356        52895 :    logic        [31:0]    gorc_d;
     357              : 
     358              : 
     359              :    assign gorc1_d[31:0]       = ( {32{rs2_in[0]}} & {rs1_in[30],rs1_in[31],rs1_in[28],rs1_in[29],rs1_in[26],rs1_in[27],rs1_in[24],rs1_in[25],
     360              :                                                      rs1_in[22],rs1_in[23],rs1_in[20],rs1_in[21],rs1_in[18],rs1_in[19],rs1_in[16],rs1_in[17],
     361              :                                                      rs1_in[14],rs1_in[15],rs1_in[12],rs1_in[13],rs1_in[10],rs1_in[11],rs1_in[08],rs1_in[09],
     362              :                                                      rs1_in[06],rs1_in[07],rs1_in[04],rs1_in[05],rs1_in[02],rs1_in[03],rs1_in[00],rs1_in[01]} ) | rs1_in[31:0];
     363              : 
     364              :    assign gorc2_d[31:0]       = ( {32{rs2_in[1]}} & {gorc1_d[29:28],gorc1_d[31:30],gorc1_d[25:24],gorc1_d[27:26],
     365              :                                                      gorc1_d[21:20],gorc1_d[23:22],gorc1_d[17:16],gorc1_d[19:18],
     366              :                                                      gorc1_d[13:12],gorc1_d[15:14],gorc1_d[09:08],gorc1_d[11:10],
     367              :                                                      gorc1_d[05:04],gorc1_d[07:06],gorc1_d[01:00],gorc1_d[03:02]} ) | gorc1_d[31:0];
     368              : 
     369              :    assign gorc4_d[31:0]       = ( {32{rs2_in[2]}} & {gorc2_d[27:24],gorc2_d[31:28],gorc2_d[19:16],gorc2_d[23:20],
     370              :                                                      gorc2_d[11:08],gorc2_d[15:12],gorc2_d[03:00],gorc2_d[07:04]} ) | gorc2_d[31:0];
     371              : 
     372              :    assign gorc8_d[31:0]       = ( {32{rs2_in[3]}} & {gorc4_d[23:16],gorc4_d[31:24],gorc4_d[07:00],gorc4_d[15:08]} ) | gorc4_d[31:0];
     373              : 
     374              :    assign gorc_d[31:0]        = ( {32{rs2_in[4]}} & {gorc8_d[15:00],gorc8_d[31:16]} ) | gorc8_d[31:0];
     375              : 
     376              : 
     377              : 
     378              : 
     379              :    // * * * * * * * * * * * * * * * * * *  BitManip  :  SHFL, UNSHLF * * * * * * * * * * * * * * * * * *
     380              : 
     381              :    // uint32_t shuffle32_stage (uint32_t src, uint32_t maskL, uint32_t maskR, int N)
     382              :    // {
     383              :    //     uint32_t x  = src & ~(maskL | maskR);
     384              :    //     x          |= ((src << N) & maskL) | ((src >> N) & maskR);
     385              :    //     return x;
     386              :    // }
     387              :    //
     388              :    //
     389              :    //
     390              :    // uint32_t shfl32(uint32_t rs1, uint32_t rs2)
     391              :    // {
     392              :    //     uint32_t x = rs1;
     393              :    //     int shamt = rs2 & 15
     394              :    //
     395              :    //     if (shamt & 8)  x = shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8);
     396              :    //     if (shamt & 4)  x = shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4);
     397              :    //     if (shamt & 2)  x = shuffle32_stage(x, 0x30303030, 0xc0c0c0c0, 2);
     398              :    //     if (shamt & 1)  x = shuffle32_stage(x, 0x44444444, 0x22222222, 1);
     399              :    //
     400              :    //     return x;
     401              :    // }
     402              : 
     403              : 
     404        20634 :    logic        [31:0]    shfl8_d;
     405        37767 :    logic        [31:0]    shfl4_d;
     406        37878 :    logic        [31:0]    shfl2_d;
     407        35804 :    logic        [31:0]    shfl_d;
     408              : 
     409              : 
     410              : 
     411              :    assign shfl8_d[31:0]       = (rs2_in[3])  ?  {rs1_in[31:24],rs1_in[15:08],rs1_in[23:16],rs1_in[07:00]}      :  rs1_in[31:0];
     412              : 
     413              :    assign shfl4_d[31:0]       = (rs2_in[2])  ?  {shfl8_d[31:28],shfl8_d[23:20],shfl8_d[27:24],shfl8_d[19:16],
     414              :                                                  shfl8_d[15:12],shfl8_d[07:04],shfl8_d[11:08],shfl8_d[03:00]}  :  shfl8_d[31:0];
     415              : 
     416              :    assign shfl2_d[31:0]       = (rs2_in[1])  ?  {shfl4_d[31:30],shfl4_d[27:26],shfl4_d[29:28],shfl4_d[25:24],
     417              :                                                  shfl4_d[23:22],shfl4_d[19:18],shfl4_d[21:20],shfl4_d[17:16],
     418              :                                                  shfl4_d[15:14],shfl4_d[11:10],shfl4_d[13:12],shfl4_d[09:08],
     419              :                                                  shfl4_d[07:06],shfl4_d[03:02],shfl4_d[05:04],shfl4_d[01:00]}  :  shfl4_d[31:0];
     420              : 
     421              :    assign shfl_d[31:0]        = (rs2_in[0])  ?  {shfl2_d[31],shfl2_d[29],shfl2_d[30],shfl2_d[28],shfl2_d[27],shfl2_d[25],shfl2_d[26],shfl2_d[24],
     422              :                                                  shfl2_d[23],shfl2_d[21],shfl2_d[22],shfl2_d[20],shfl2_d[19],shfl2_d[17],shfl2_d[18],shfl2_d[16],
     423              :                                                  shfl2_d[15],shfl2_d[13],shfl2_d[14],shfl2_d[12],shfl2_d[11],shfl2_d[09],shfl2_d[10],shfl2_d[08],
     424              :                                                  shfl2_d[07],shfl2_d[05],shfl2_d[06],shfl2_d[04],shfl2_d[03],shfl2_d[01],shfl2_d[02],shfl2_d[00]}  :  shfl2_d[31:0];
     425              : 
     426              : 
     427              : 
     428              : 
     429              :    // uint32_t unshfl32(uint32_t rs1, uint32_t rs2)
     430              :    // {
     431              :    //     uint32_t x = rs1;
     432              :    //     int shamt = rs2 & 15
     433              :    //
     434              :    //     if (shamt & 1)  x = shuffle32_stage(x, 0x44444444, 0x22222222, 1);
     435              :    //     if (shamt & 2)  x = shuffle32_stage(x, 0x30303030, 0xc0c0c0c0, 2);
     436              :    //     if (shamt & 4)  x = shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4);
     437              :    //     if (shamt & 8)  x = shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8);
     438              :    //
     439              :    //     return x;
     440              :    // }
     441              : 
     442              : 
     443        20262 :    logic        [31:0]    unshfl1_d;
     444        20450 :    logic        [31:0]    unshfl2_d;
     445        39731 :    logic        [31:0]    unshfl4_d;
     446        32020 :    logic        [31:0]    unshfl_d;
     447              : 
     448              : 
     449              :    assign unshfl1_d[31:0]     = (rs2_in[0])  ?  {rs1_in[31],rs1_in[29],rs1_in[30],rs1_in[28],rs1_in[27],rs1_in[25],rs1_in[26],rs1_in[24],
     450              :                                                  rs1_in[23],rs1_in[21],rs1_in[22],rs1_in[20],rs1_in[19],rs1_in[17],rs1_in[18],rs1_in[16],
     451              :                                                  rs1_in[15],rs1_in[13],rs1_in[14],rs1_in[12],rs1_in[11],rs1_in[09],rs1_in[10],rs1_in[08],
     452              :                                                  rs1_in[07],rs1_in[05],rs1_in[06],rs1_in[04],rs1_in[03],rs1_in[01],rs1_in[02],rs1_in[00]}  :  rs1_in[31:0];
     453              : 
     454              :    assign unshfl2_d[31:0]     = (rs2_in[1])  ?  {unshfl1_d[31:30],unshfl1_d[27:26],unshfl1_d[29:28],unshfl1_d[25:24],
     455              :                                                  unshfl1_d[23:22],unshfl1_d[19:18],unshfl1_d[21:20],unshfl1_d[17:16],
     456              :                                                  unshfl1_d[15:14],unshfl1_d[11:10],unshfl1_d[13:12],unshfl1_d[09:08],
     457              :                                                  unshfl1_d[07:06],unshfl1_d[03:02],unshfl1_d[05:04],unshfl1_d[01:00]}  :  unshfl1_d[31:0];
     458              : 
     459              :    assign unshfl4_d[31:0]     = (rs2_in[2])  ?  {unshfl2_d[31:28],unshfl2_d[23:20],unshfl2_d[27:24],unshfl2_d[19:16],
     460              :                                                  unshfl2_d[15:12],unshfl2_d[07:04],unshfl2_d[11:08],unshfl2_d[03:00]}  :  unshfl2_d[31:0];
     461              : 
     462              :    assign unshfl_d[31:0]      = (rs2_in[3])  ?  {unshfl4_d[31:24],unshfl4_d[15:08],unshfl4_d[23:16],unshfl4_d[07:00]}  :  unshfl4_d[31:0];
     463              : 
     464              : 
     465              : 
     466              : 
     467              :    // * * * * * * * * * * * * * * * * * *  BitManip  :  XPERM          * * * * * * * * * * * * * * * * *
     468              : 
     469              : //
     470              : // These instructions operate on nibbles/bytes/half-words/words.
     471              : // rs1 is a vector of data words and rs2 is a vector of indices into rs1.
     472              : // The result of the instruction is the vector rs2 with each element replaced by the corresponding data word from rs1,
     473              : // or zero then the index in rs2 is out of bounds.
     474              : //
     475              : //   uint_xlen_t xperm(uint_xlen_t rs1, uint_xlen_t rs2, int sz_log2)
     476              : //   {
     477              : //       uint_xlen_t r = 0;
     478              : //       uint_xlen_t sz = 1LL << sz_log2;
     479              : //       uint_xlen_t mask = (1LL << sz) - 1;
     480              : //       for (int i = 0; i < XLEN; i += sz)
     481              : //           { uint_xlen_t pos = ((rs2 >> i) & mask) << sz_log2;
     482              : //             if (pos < XLEN)
     483              : //                 r |= ((rs1 >> pos) & mask) << i;
     484              : //           }
     485              : //       return r;
     486              : //   }
     487              : //
     488              : // uint_xlen_t xperm_n (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 2); }
     489              : // uint_xlen_t xperm_b (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 3); }
     490              : // uint_xlen_t xperm_h (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 4); }
     491              : // uint_xlen_t xperm_w (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 5); }   Not part of RV32
     492              : //
     493              : // The xperm.[nbhw] instructions can be implemented with an XLEN/4-lane nibble-wide crossbarswitch.
     494              : 
     495              : // *** XPERM_B ***
     496              : 
     497              :    // XLEN    = 32
     498              :    // SZ_LOG2 =  3
     499              :    // SZ      = 4'd8;
     500              :    // MASK    = ( 1 << 8 ) - 1
     501              :    //         = 8'hFF
     502              : 
     503              :    // integer                xperm_b_i;
     504              :    // logic        [31:0]    xperm_b_r;
     505              :    // logic        [3:0]     xperm_b_sz;
     506              :    // logic        [7:0]     xperm_b_mask;
     507              :    // logic        [31:0]    xperm_b_pos;
     508              :    //
     509              :    //
     510              :    // assign xperm_b_sz[3:0]        =  4'd8;
     511              :    // assign xperm_b_mask[7:0]      =  8'hff;
     512              :    //
     513              :    // always_comb
     514              :    //   begin
     515              :    //     xperm_b_r[31:0] = 32'b0;
     516              :    //
     517              :    //     for (xperm_b_i=0; xperm_b_i<32; xperm_b_i = xperm_b_i + xperm_b_sz)     // This code did not work...
     518              :    //       begin
     519              :    //         xperm_b_pos[31:0] = ( (rs2_in[31:0] >> xperm_b_i) & {24'h0,xperm_b_mask[7:0]} ) << 3;
     520              :    //         if (xperm_b_pos[31:0] < 32'd32)
     521              :    //            xperm_b_r[31:0] = xperm_b_r[31:0] | ( ((rs1_in[31:0] >> xperm_b_pos[4:0]) & {24'h0,xperm_b_mask[7:0]}) << xperm_b_i );
     522              :    //       end
     523              :    //   end
     524              : 
     525        20236 :    logic        [31:0]    xperm_n;
     526        59290 :    logic        [31:0]    xperm_b;
     527         7230 :    logic        [31:0]    xperm_h;
     528              : 
     529              :    assign xperm_n[03:00]         =  { 4{    ~rs2_in[03]     }} & 4'( (rs1_in[31:0] >> {rs2_in[02:00],2'b0}) &     4'hf );   // This is a 8:1 mux with qualified selects
     530              :    assign xperm_n[07:04]         =  { 4{    ~rs2_in[07]     }} & 4'( (rs1_in[31:0] >> {rs2_in[06:04],2'b0}) &     4'hf );
     531              :    assign xperm_n[11:08]         =  { 4{    ~rs2_in[11]     }} & 4'( (rs1_in[31:0] >> {rs2_in[10:08],2'b0}) &     4'hf );
     532              :    assign xperm_n[15:12]         =  { 4{    ~rs2_in[15]     }} & 4'( (rs1_in[31:0] >> {rs2_in[14:12],2'b0}) &     4'hf );
     533              :    assign xperm_n[19:16]         =  { 4{    ~rs2_in[19]     }} & 4'( (rs1_in[31:0] >> {rs2_in[18:16],2'b0}) &     4'hf );
     534              :    assign xperm_n[23:20]         =  { 4{    ~rs2_in[23]     }} & 4'( (rs1_in[31:0] >> {rs2_in[22:20],2'b0}) &     4'hf );
     535              :    assign xperm_n[27:24]         =  { 4{    ~rs2_in[27]     }} & 4'( (rs1_in[31:0] >> {rs2_in[26:24],2'b0}) &     4'hf );
     536              :    assign xperm_n[31:28]         =  { 4{    ~rs2_in[31]     }} & 4'( (rs1_in[31:0] >> {rs2_in[30:28],2'b0}) &     4'hf );
     537              : 
     538              :    assign xperm_b[07:00]         =  { 8{ ~(| rs2_in[07:02]) }} & 8'( (rs1_in[31:0] >> {rs2_in[01:00],3'b0}) &    8'hff );   // This is a 4:1 mux with qualified selects
     539              :    assign xperm_b[15:08]         =  { 8{ ~(| rs2_in[15:10]) }} & 8'( (rs1_in[31:0] >> {rs2_in[09:08],3'b0}) &    8'hff );
     540              :    assign xperm_b[23:16]         =  { 8{ ~(| rs2_in[23:18]) }} & 8'( (rs1_in[31:0] >> {rs2_in[17:16],3'b0}) &    8'hff );
     541              :    assign xperm_b[31:24]         =  { 8{ ~(| rs2_in[31:26]) }} & 8'( (rs1_in[31:0] >> {rs2_in[25:24],3'b0}) &    8'hff );
     542              : 
     543              :    assign xperm_h[15:00]         =  {16{ ~(| rs2_in[15:01]) }} & 16'( (rs1_in[31:0] >> {rs2_in[00]   ,4'b0}) & 16'hffff );   // This is a 2:1 mux with qualified selects
     544              :    assign xperm_h[31:16]         =  {16{ ~(| rs2_in[31:17]) }} & 16'( (rs1_in[31:0] >> {rs2_in[16]   ,4'b0}) & 16'hffff );
     545              : 
     546              : 
     547              : 
     548              : 
     549              :    // * * * * * * * * * * * * * * * * * *  BitManip  :  CRC32, CRC32c  * * * * * * * * * * * * * * * * *
     550              : 
     551              :    // ***  computed from   https: //crccalc.com  ***
     552              :    //
     553              :    // "a" is 8'h61 = 8'b0110_0001    (8'h61 ^ 8'hff = 8'h9e)
     554              :    //
     555              :    // Input must first be XORed with 32'hffff_ffff
     556              :    //
     557              :    //
     558              :    // CRC32
     559              :    //
     560              :    // Input    Output        Input      Output
     561              :    // -----   --------      --------   --------
     562              :    // "a"     e8b7be43      ffffff9e   174841bc
     563              :    // "aa"    078a19d7      ffff9e9e   f875e628
     564              :    // "aaaa"  ad98e545      9e9e9e9e   5267a1ba
     565              :    //
     566              :    //
     567              :    //
     568              :    // CRC32c
     569              :    //
     570              :    // Input    Output        Input      Output
     571              :    // -----   --------      --------   --------
     572              :    // "a"     c1d04330      ffffff9e   3e2fbccf
     573              :    // "aa"    f1f2dac2      ffff9e9e   0e0d253d
     574              :    // "aaaa"  6a52eeb0      9e9e9e9e   95ad114f
     575              : 
     576              : 
     577            0 :    logic                  crc32_all;
     578          340 :    logic        [31:0]    crc32_poly_rev;
     579          340 :    logic        [31:0]    crc32c_poly_rev;
     580              :    integer                crc32_bi, crc32_hi, crc32_wi, crc32c_bi, crc32c_hi, crc32c_wi;
     581        47786 :    logic        [31:0]    crc32_bd, crc32_hd, crc32_wd, crc32c_bd, crc32c_hd, crc32c_wd;
     582              : 
     583              : 
     584              :    assign crc32_all              =  ap_crc32_b  | ap_crc32_h  | ap_crc32_w | ap_crc32c_b | ap_crc32c_h | ap_crc32c_w;
     585              : 
     586              :    assign crc32_poly_rev[31:0]   =  32'hEDB88320;    // bit reverse of 32'h04C11DB7
     587              :    assign crc32c_poly_rev[31:0]  =  32'h82F63B78;    // bit reverse of 32'h1EDC6F41
     588              : 
     589              : 
     590          340 :    always_comb
     591          340 :      begin
     592          340 :        crc32_bd[31:0]            =  rs1_in[31:0];
     593              : 
     594          340 :        for (crc32_bi=0; crc32_bi<8; crc32_bi++)
     595         2720 :          begin
     596         2720 :             crc32_bd[31:0] = (crc32_bd[31:0] >> 1) ^ (crc32_poly_rev[31:0] & {32{crc32_bd[0]}});
     597              :          end      // FOR    crc32_bi
     598              :      end          // ALWAYS_COMB
     599              : 
     600              : 
     601          340 :    always_comb
     602          340 :      begin
     603          340 :        crc32_hd[31:0]            =  rs1_in[31:0];
     604              : 
     605          340 :        for (crc32_hi=0; crc32_hi<16; crc32_hi++)
     606         5440 :          begin
     607         5440 :             crc32_hd[31:0] = (crc32_hd[31:0] >> 1) ^ (crc32_poly_rev[31:0] & {32{crc32_hd[0]}});
     608              :          end      // FOR    crc32_hi
     609              :      end          // ALWAYS_COMB
     610              : 
     611              : 
     612          340 :    always_comb
     613          340 :      begin
     614          340 :        crc32_wd[31:0]            =  rs1_in[31:0];
     615              : 
     616          340 :        for (crc32_wi=0; crc32_wi<32; crc32_wi++)
     617        10880 :          begin
     618        10880 :             crc32_wd[31:0] = (crc32_wd[31:0] >> 1) ^ (crc32_poly_rev[31:0] & {32{crc32_wd[0]}});
     619              :          end      // FOR    crc32_wi
     620              :      end          // ALWAYS_COMB
     621              : 
     622              : 
     623              : 
     624              : 
     625          340 :    always_comb
     626          340 :      begin
     627          340 :        crc32c_bd[31:0]           =  rs1_in[31:0];
     628              : 
     629          340 :        for (crc32c_bi=0; crc32c_bi<8; crc32c_bi++)
     630         2720 :          begin
     631         2720 :             crc32c_bd[31:0] = (crc32c_bd[31:0] >> 1) ^ (crc32c_poly_rev[31:0] & {32{crc32c_bd[0]}});
     632              :          end      // FOR    crc32c_bi
     633              :      end          // ALWAYS_COMB
     634              : 
     635              : 
     636          340 :    always_comb
     637          340 :      begin
     638          340 :        crc32c_hd[31:0]           =  rs1_in[31:0];
     639              : 
     640          340 :        for (crc32c_hi=0; crc32c_hi<16; crc32c_hi++)
     641         5440 :          begin
     642         5440 :             crc32c_hd[31:0] = (crc32c_hd[31:0] >> 1) ^ (crc32c_poly_rev[31:0] & {32{crc32c_hd[0]}});
     643              :          end      // FOR    crc32c_hi
     644              :      end          // ALWAYS_COMB
     645              : 
     646              : 
     647          340 :    always_comb
     648          340 :      begin
     649          340 :        crc32c_wd[31:0]           =  rs1_in[31:0];
     650              : 
     651          340 :        for (crc32c_wi=0; crc32c_wi<32; crc32c_wi++)
     652        10880 :          begin
     653        10880 :             crc32c_wd[31:0] = (crc32c_wd[31:0] >> 1) ^ (crc32c_poly_rev[31:0] & {32{crc32c_wd[0]}});
     654              :          end      // FOR    crc32c_wi
     655              :      end          // ALWAYS_COMB
     656              : 
     657              : 
     658              : 
     659              : 
     660              : 
     661              :    // * * * * * * * * * * * * * * * * * *  BitManip  :  BFP          * * * * * * * * * * * * * * * * * *
     662              : 
     663              : 
     664              :    // uint_xlen_t bfp(uint_xlen_t rs1, uint_xlen_t rs2)
     665              :    // {
     666              :    //    uint_xlen_t cfg = rs2 >> (XLEN/2);
     667              :    //    if ((cfg >> 30) == 2) cfg = cfg >> 16;
     668              :    //    int len          = (cfg >> 8) & (XLEN/2-1);
     669              :    //    int off          = cfg & (XLEN-1);
     670              :    //    len              = len ? len : XLEN/2;
     671              :    //    uint_xlen_t mask = slo(0, len) << off;
     672              :    //    uint_xlen_t data = rs2 << off;
     673              :    //    return (data & mask) | (rs1 & ~mask);
     674              : 
     675              : 
     676        27363 :    logic        [4:0]     bfp_len;
     677        27751 :    logic        [4:0]     bfp_off;
     678          276 :    logic        [31:0]    bfp_len_mask_;
     679          340 :    logic        [31:0]    bfp_off_mask_;
     680        25762 :    logic        [15:0]    bfp_preshift_data;
     681         4976 :    logic        [31:0]    bfp_shift_data;
     682        10408 :    logic        [31:0]    bfp_shift_mask;
     683        19594 :    logic        [31:0]    bfp_result_d;
     684              : 
     685              : 
     686              :    assign bfp_len[3:0]           =  rs2_in[27:24];
     687              :    assign bfp_len[4]             = (bfp_len[3:0] == 4'b0);   // If LEN field is zero, then LEN=16
     688              :    assign bfp_off[4:0]           =  rs2_in[20:16];
     689              : 
     690              :    assign bfp_len_mask_[31:0]    =  32'hffff_ffff  <<  bfp_len[4:0];
     691              :    assign bfp_off_mask_[31:0]    =  32'hffff_ffff  <<  bfp_off[4:0];
     692              :    assign bfp_preshift_data[15:0]=  rs2_in[15:0] & ~bfp_len_mask_[15:0];
     693              : 
     694              :    assign bfp_shift_data[31:0]   = {16'b0,bfp_preshift_data[15:0]}  <<  bfp_off[4:0];
     695              :    assign bfp_shift_mask[31:0]   = (bfp_len_mask_[31:0]             <<  bfp_off[4:0]) | ~bfp_off_mask_[31:0];
     696              : 
     697              :    assign bfp_result_d[31:0]     = bfp_shift_data[31:0] | (rs1_in[31:0] & bfp_shift_mask[31:0]);
     698              : 
     699              : 
     700              : 
     701              : 
     702              :    // * * * * * * * * * * * * * * * * * *  BitManip  :  Common logic * * * * * * * * * * * * * * * * * *
     703              : 
     704              : 
     705              :    assign bitmanip_sel_d         =  ap_bcompress | ap_bdecompress | ap_clmul | ap_clmulh | ap_clmulr | ap_grev | ap_gorc | ap_shfl | ap_unshfl | crc32_all | ap_bfp | ap_xperm_n | ap_xperm_b | ap_xperm_h;
     706              : 
     707              :    assign bitmanip_d[31:0]       = ( {32{ap_bcompress}}    &       bcompress_d[31:0]   ) |
     708              :                                    ( {32{ap_bdecompress}}  &       bdecompress_d[31:0] ) |
     709              :                                    ( {32{ap_clmul}}        &       clmul_raw_d[31:0]   ) |
     710              :                                    ( {32{ap_clmulh}}       & {1'b0,clmul_raw_d[62:32]} ) |
     711              :                                    ( {32{ap_clmulr}}       &       clmul_raw_d[62:31]  ) |
     712              :                                    ( {32{ap_grev}}         &       grev_d[31:0]        ) |
     713              :                                    ( {32{ap_gorc}}         &       gorc_d[31:0]        ) |
     714              :                                    ( {32{ap_shfl}}         &       shfl_d[31:0]        ) |
     715              :                                    ( {32{ap_unshfl}}       &       unshfl_d[31:0]      ) |
     716              :                                    ( {32{ap_crc32_b}}      &       crc32_bd[31:0]      ) |
     717              :                                    ( {32{ap_crc32_h}}      &       crc32_hd[31:0]      ) |
     718              :                                    ( {32{ap_crc32_w}}      &       crc32_wd[31:0]      ) |
     719              :                                    ( {32{ap_crc32c_b}}     &       crc32c_bd[31:0]     ) |
     720              :                                    ( {32{ap_crc32c_h}}     &       crc32c_hd[31:0]     ) |
     721              :                                    ( {32{ap_crc32c_w}}     &       crc32c_wd[31:0]     ) |
     722              :                                    ( {32{ap_bfp}}          &       bfp_result_d[31:0]  ) |
     723              :                                    ( {32{ap_xperm_n}}      &       xperm_n[31:0]       ) |
     724              :                                    ( {32{ap_xperm_b}}      &       xperm_b[31:0]       ) |
     725              :                                    ( {32{ap_xperm_h}}      &       xperm_h[31:0]       );
     726              : 
     727              : 
     728              : 
     729              :    rvdffe #(33) i_bitmanip_ff    (.*, .clk(clk),  .din({bitmanip_sel_d,bitmanip_d[31:0]}),   .dout({bitmanip_sel_x,bitmanip_x[31:0]}),   .en(bit_x_enable));
     730              : 
     731              : 
     732              : 
     733              : 
     734              :    assign result_x[31:0]         =  ( {32{~bitmanip_sel_x & ~low_x}} & prod_x[63:32]    ) |
     735              :                                     ( {32{~bitmanip_sel_x &  low_x}} & prod_x[31:0]     ) |
     736              :                                                                        bitmanip_x[31:0];
     737              : 
     738              : 
     739              : 
     740              : endmodule  // el2_exu_mul_ctl