Project Full coverage report
Current view: Cores-VeeR-EL2—Cores-VeeR-EL2—design—exu—el2_exu_mul_ctl.sv Coverage Hit Total
Test Date: 19-09-2024 Toggle 75.0% 57 76
Test: all Branch 100.0% 60 60

            Line data    Source code
       1              : // SPDX-License-Identifier: Apache-2.0
       2              : // Copyright 2020 Western Digital Corporation or its affiliates.
       3              : //
       4              : // Licensed under the Apache License, Version 2.0 (the "License");
       5              : // you may not use this file except in compliance with the License.
       6              : // You may obtain a copy of the License at
       7              : //
       8              : // http://www.apache.org/licenses/LICENSE-2.0
       9              : //
      10              : // Unless required by applicable law or agreed to in writing, software
      11              : // distributed under the License is distributed on an "AS IS" BASIS,
      12              : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      13              : // See the License for the specific language governing permissions and
      14              : // limitations under the License.
      15              : 
      16              : 
      17              : module el2_exu_mul_ctl
      18              : import el2_pkg::*;
      19              : #(
      20              : `include "el2_param.vh"
      21              :  )
      22              :   (
      23     61843973 :    input logic          clk,              // Top level clock
      24          317 :    input logic          rst_l,            // Reset
      25            0 :    input logic          scan_mode,        // Scan mode
      26              : 
      27            0 :    input el2_mul_pkt_t mul_p,            // {Valid, RS1 signed operand, RS2 signed operand, Select low 32-bits of result}
      28              : 
      29        20188 :    input logic [31:0]   rs1_in,           // A operand
      30        22747 :    input logic [31:0]   rs2_in,           // B operand
      31              : 
      32              : 
      33        17528 :    output logic [31:0]  result_x          // Result
      34              :   );
      35              : 
      36              : 
      37       251048 :    logic                mul_x_enable;
      38       251048 :    logic                bit_x_enable;
      39        16868 :    logic signed [32:0]  rs1_ext_in;
      40        12631 :    logic signed [32:0]  rs2_ext_in;
      41        34802 :    logic        [65:0]  prod_x;
      42        16705 :    logic                low_x;
      43              : 
      44              : 
      45              : 
      46              :    // *** Start - BitManip ***
      47              : 
      48         2770 :    logic                bitmanip_sel_d;
      49         1428 :    logic                bitmanip_sel_x;
      50          242 :    logic        [31:0]  bitmanip_d;
      51          224 :    logic        [31:0]  bitmanip_x;
      52              : 
      53              : 
      54              : 
      55              :    // ZBE
      56            0 :    logic                ap_bcompress;
      57            0 :    logic                ap_bdecompress;
      58              : 
      59              :    // ZBC
      60          852 :    logic                ap_clmul;
      61          888 :    logic                ap_clmulh;
      62         1030 :    logic                ap_clmulr;
      63              : 
      64              :    // ZBP
      65            0 :    logic                ap_grev;
      66            0 :    logic                ap_gorc;
      67            0 :    logic                ap_shfl;
      68            0 :    logic                ap_unshfl;
      69            0 :    logic                ap_xperm_n;
      70            0 :    logic                ap_xperm_b;
      71            0 :    logic                ap_xperm_h;
      72              : 
      73              :    // ZBR
      74            0 :    logic                ap_crc32_b;
      75            0 :    logic                ap_crc32_h;
      76            0 :    logic                ap_crc32_w;
      77            0 :    logic                ap_crc32c_b;
      78            0 :    logic                ap_crc32c_h;
      79            0 :    logic                ap_crc32c_w;
      80              : 
      81              :    // ZBF
      82            0 :    logic                ap_bfp;
      83              : 
      84              : 
      85              :    if (pt.BITMANIP_ZBE == 1)
      86              :      begin
      87              :        assign ap_bcompress    =  mul_p.bcompress;
      88              :        assign ap_bdecompress  =  mul_p.bdecompress;
      89              :      end
      90              :    else
      91              :      begin
      92              :        assign ap_bcompress    =  1'b0;
      93              :        assign ap_bdecompress  =  1'b0;
      94              :      end
      95              : 
      96              :    if (pt.BITMANIP_ZBC == 1)
      97              :      begin
      98              :        assign ap_clmul        =  mul_p.clmul;
      99              :        assign ap_clmulh       =  mul_p.clmulh;
     100              :        assign ap_clmulr       =  mul_p.clmulr;
     101              :      end
     102              :    else
     103              :      begin
     104              :        assign ap_clmul        =  1'b0;
     105              :        assign ap_clmulh       =  1'b0;
     106              :        assign ap_clmulr       =  1'b0;
     107              :      end
     108              : 
     109              :    if (pt.BITMANIP_ZBP == 1)
     110              :      begin
     111              :        assign ap_grev         =  mul_p.grev;
     112              :        assign ap_gorc         =  mul_p.gorc;
     113              :        assign ap_shfl         =  mul_p.shfl;
     114              :        assign ap_unshfl       =  mul_p.unshfl;
     115              :        assign ap_xperm_n      =  mul_p.xperm_n;
     116              :        assign ap_xperm_b      =  mul_p.xperm_b;
     117              :        assign ap_xperm_h      =  mul_p.xperm_h;
     118              :      end
     119              :    else
     120              :      begin
     121              :        assign ap_grev         =  1'b0;
     122              :        assign ap_gorc         =  1'b0;
     123              :        assign ap_shfl         =  1'b0;
     124              :        assign ap_unshfl       =  1'b0;
     125              :        assign ap_xperm_n      =  1'b0;
     126              :        assign ap_xperm_b      =  1'b0;
     127              :        assign ap_xperm_h      =  1'b0;
     128              :      end
     129              : 
     130              :    if (pt.BITMANIP_ZBR == 1)
     131              :      begin
     132              :        assign ap_crc32_b      =  mul_p.crc32_b;
     133              :        assign ap_crc32_h      =  mul_p.crc32_h;
     134              :        assign ap_crc32_w      =  mul_p.crc32_w;
     135              :        assign ap_crc32c_b     =  mul_p.crc32c_b;
     136              :        assign ap_crc32c_h     =  mul_p.crc32c_h;
     137              :        assign ap_crc32c_w     =  mul_p.crc32c_w;
     138              :      end
     139              :    else
     140              :      begin
     141              :        assign ap_crc32_b      =  1'b0;
     142              :        assign ap_crc32_h      =  1'b0;
     143              :        assign ap_crc32_w      =  1'b0;
     144              :        assign ap_crc32c_b     =  1'b0;
     145              :        assign ap_crc32c_h     =  1'b0;
     146              :        assign ap_crc32c_w     =  1'b0;
     147              :      end
     148              : 
     149              :    if (pt.BITMANIP_ZBF == 1)
     150              :      begin
     151              :        assign ap_bfp          =  mul_p.bfp;
     152              :      end
     153              :    else
     154              :      begin
     155              :        assign ap_bfp          =  1'b0;
     156              :      end
     157              : 
     158              : 
     159              :    // *** End   - BitManip ***
     160              : 
     161              : 
     162              : 
     163              :    assign mul_x_enable           =  mul_p.valid;
     164              :    assign bit_x_enable           =  mul_p.valid;
     165              : 
     166              :    assign rs1_ext_in[32]         =  mul_p.rs1_sign & rs1_in[31];
     167              :    assign rs2_ext_in[32]         =  mul_p.rs2_sign & rs2_in[31];
     168              : 
     169              :    assign rs1_ext_in[31:0]       =  rs1_in[31:0];
     170              :    assign rs2_ext_in[31:0]       =  rs2_in[31:0];
     171              : 
     172              : 
     173              : 
     174              :    // --------------------------- Multiply       ----------------------------------
     175              : 
     176              : 
     177        12410 :    logic signed [32:0]  rs1_x;
     178        11050 :    logic signed [32:0]  rs2_x;
     179              : 
     180              :    rvdffe #(34) i_a_x_ff         (.*, .clk(clk),  .din({mul_p.low,rs1_ext_in[32:0]}),        .dout({low_x,rs1_x[32:0]}),                 .en(mul_x_enable));
     181              :    rvdffe #(33) i_b_x_ff         (.*, .clk(clk),  .din(           rs2_ext_in[32:0] ),        .dout(       rs2_x[32:0] ),                 .en(mul_x_enable));
     182              : 
     183              : 
     184              :    assign prod_x[65:0]           =  rs1_x  *  rs2_x;
     185              : 
     186              : 
     187              : 
     188              : 
     189              :    // * * * * * * * * * * * * * * * * * *  BitManip  :  BCOMPRESS, BDECOMPRESS * * * * * * * * * * * * *
     190              : 
     191              : 
     192              :    // *** BCOMPRESS == "gather"  ***
     193              : 
     194         1362 :    logic        [31:0]    bcompress_d;
     195        41861 :    logic                  bcompress_test_bit_d;
     196              :    integer                bcompress_i, bcompress_j;
     197              : 
     198              : 
     199          318 :    always_comb
     200          318 :      begin
     201              : 
     202          318 :        bcompress_j                             =      0;
     203          318 :        bcompress_test_bit_d                    =   1'b0;
     204          318 :        bcompress_d[31:0]                       =  32'b0;
     205              : 
     206          318 :        for (bcompress_i=0; bcompress_i<32; bcompress_i++)
     207        10176 :          begin
     208        10176 :              bcompress_test_bit_d              =  rs2_in[bcompress_i];
     209    438942656 :              if (bcompress_test_bit_d)
     210       715744 :                begin
     211       715744 :                   bcompress_d[bcompress_j]     =  rs1_in[bcompress_i];
     212       715744 :                   bcompress_j                  =  bcompress_j + 1;
     213              :                end  // IF  bcompress_test_bit
     214              :          end        // FOR bcompress_i
     215              :      end            // ALWAYS_COMB
     216              : 
     217              : 
     218              : 
     219              :    // *** BDECOMPRESS == "scatter" ***
     220              : 
     221         7191 :    logic        [31:0]    bdecompress_d;
     222        41861 :    logic                  bdecompress_test_bit_d;
     223              :    integer                bdecompress_i, bdecompress_j;
     224              : 
     225              : 
     226          318 :    always_comb
     227          318 :      begin
     228              : 
     229          318 :        bdecompress_j                           =      0;
     230          318 :        bdecompress_test_bit_d                  =   1'b0;
     231          318 :        bdecompress_d[31:0]                     =  32'b0;
     232              : 
     233          318 :        for (bdecompress_i=0; bdecompress_i<32; bdecompress_i++)
     234        10176 :          begin
     235        10176 :              bdecompress_test_bit_d            =  rs2_in[bdecompress_i];
     236    438942656 :              if (bdecompress_test_bit_d)
     237       715744 :                begin
     238       715744 :                   bdecompress_d[bdecompress_i] =  rs1_in[bdecompress_j];
     239       715744 :                   bdecompress_j                =  bdecompress_j + 1;
     240              :                end  // IF  bdecompress_test_bit
     241              :          end        // FOR bdecompress_i
     242              :      end            // ALWAYS_COMB
     243              : 
     244              : 
     245              : 
     246              : 
     247              :    // * * * * * * * * * * * * * * * * * *  BitManip  :  CLMUL, CLMULH, CLMULR  * * * * * * * * * * * * *
     248              : 
     249        57611 :    logic        [62:0]    clmul_raw_d;
     250              : 
     251              : 
     252              :    assign clmul_raw_d[62:0]      = ( {63{rs2_in[00]}} & {31'b0,rs1_in[31:0]      } ) ^
     253              :                                    ( {63{rs2_in[01]}} & {30'b0,rs1_in[31:0], 1'b0} ) ^
     254              :                                    ( {63{rs2_in[02]}} & {29'b0,rs1_in[31:0], 2'b0} ) ^
     255              :                                    ( {63{rs2_in[03]}} & {28'b0,rs1_in[31:0], 3'b0} ) ^
     256              :                                    ( {63{rs2_in[04]}} & {27'b0,rs1_in[31:0], 4'b0} ) ^
     257              :                                    ( {63{rs2_in[05]}} & {26'b0,rs1_in[31:0], 5'b0} ) ^
     258              :                                    ( {63{rs2_in[06]}} & {25'b0,rs1_in[31:0], 6'b0} ) ^
     259              :                                    ( {63{rs2_in[07]}} & {24'b0,rs1_in[31:0], 7'b0} ) ^
     260              :                                    ( {63{rs2_in[08]}} & {23'b0,rs1_in[31:0], 8'b0} ) ^
     261              :                                    ( {63{rs2_in[09]}} & {22'b0,rs1_in[31:0], 9'b0} ) ^
     262              :                                    ( {63{rs2_in[10]}} & {21'b0,rs1_in[31:0],10'b0} ) ^
     263              :                                    ( {63{rs2_in[11]}} & {20'b0,rs1_in[31:0],11'b0} ) ^
     264              :                                    ( {63{rs2_in[12]}} & {19'b0,rs1_in[31:0],12'b0} ) ^
     265              :                                    ( {63{rs2_in[13]}} & {18'b0,rs1_in[31:0],13'b0} ) ^
     266              :                                    ( {63{rs2_in[14]}} & {17'b0,rs1_in[31:0],14'b0} ) ^
     267              :                                    ( {63{rs2_in[15]}} & {16'b0,rs1_in[31:0],15'b0} ) ^
     268              :                                    ( {63{rs2_in[16]}} & {15'b0,rs1_in[31:0],16'b0} ) ^
     269              :                                    ( {63{rs2_in[17]}} & {14'b0,rs1_in[31:0],17'b0} ) ^
     270              :                                    ( {63{rs2_in[18]}} & {13'b0,rs1_in[31:0],18'b0} ) ^
     271              :                                    ( {63{rs2_in[19]}} & {12'b0,rs1_in[31:0],19'b0} ) ^
     272              :                                    ( {63{rs2_in[20]}} & {11'b0,rs1_in[31:0],20'b0} ) ^
     273              :                                    ( {63{rs2_in[21]}} & {10'b0,rs1_in[31:0],21'b0} ) ^
     274              :                                    ( {63{rs2_in[22]}} & { 9'b0,rs1_in[31:0],22'b0} ) ^
     275              :                                    ( {63{rs2_in[23]}} & { 8'b0,rs1_in[31:0],23'b0} ) ^
     276              :                                    ( {63{rs2_in[24]}} & { 7'b0,rs1_in[31:0],24'b0} ) ^
     277              :                                    ( {63{rs2_in[25]}} & { 6'b0,rs1_in[31:0],25'b0} ) ^
     278              :                                    ( {63{rs2_in[26]}} & { 5'b0,rs1_in[31:0],26'b0} ) ^
     279              :                                    ( {63{rs2_in[27]}} & { 4'b0,rs1_in[31:0],27'b0} ) ^
     280              :                                    ( {63{rs2_in[28]}} & { 3'b0,rs1_in[31:0],28'b0} ) ^
     281              :                                    ( {63{rs2_in[29]}} & { 2'b0,rs1_in[31:0],29'b0} ) ^
     282              :                                    ( {63{rs2_in[30]}} & { 1'b0,rs1_in[31:0],30'b0} ) ^
     283              :                                    ( {63{rs2_in[31]}} & {      rs1_in[31:0],31'b0} );
     284              : 
     285              : 
     286              : 
     287              : 
     288              :    // * * * * * * * * * * * * * * * * * *  BitManip  :  GREV         * * * * * * * * * * * * * * * * * *
     289              : 
     290              :    // uint32_t grev32(uint32_t rs1, uint32_t rs2)
     291              :    // {
     292              :    //     uint32_t x = rs1;
     293              :    //     int shamt = rs2 & 31;
     294              :    //
     295              :    //     if (shamt &  1)  x = ( (x & 0x55555555) <<  1) | ( (x & 0xAAAAAAAA) >>  1);
     296              :    //     if (shamt &  2)  x = ( (x & 0x33333333) <<  2) | ( (x & 0xCCCCCCCC) >>  2);
     297              :    //     if (shamt &  4)  x = ( (x & 0x0F0F0F0F) <<  4) | ( (x & 0xF0F0F0F0) >>  4);
     298              :    //     if (shamt &  8)  x = ( (x & 0x00FF00FF) <<  8) | ( (x & 0xFF00FF00) >>  8);
     299              :    //     if (shamt & 16)  x = ( (x & 0x0000FFFF) << 16) | ( (x & 0xFFFF0000) >> 16);
     300              :    //
     301              :    //     return x;
     302              :    //  }
     303              : 
     304              : 
     305        29061 :    logic        [31:0]    grev1_d;
     306        26044 :    logic        [31:0]    grev2_d;
     307        24671 :    logic        [31:0]    grev4_d;
     308        46661 :    logic        [31:0]    grev8_d;
     309        35104 :    logic        [31:0]    grev_d;
     310              : 
     311              : 
     312              :    assign grev1_d[31:0]       = (rs2_in[0])  ?  {rs1_in[30],rs1_in[31],rs1_in[28],rs1_in[29],rs1_in[26],rs1_in[27],rs1_in[24],rs1_in[25],
     313              :                                                  rs1_in[22],rs1_in[23],rs1_in[20],rs1_in[21],rs1_in[18],rs1_in[19],rs1_in[16],rs1_in[17],
     314              :                                                  rs1_in[14],rs1_in[15],rs1_in[12],rs1_in[13],rs1_in[10],rs1_in[11],rs1_in[08],rs1_in[09],
     315              :                                                  rs1_in[06],rs1_in[07],rs1_in[04],rs1_in[05],rs1_in[02],rs1_in[03],rs1_in[00],rs1_in[01]}  :  rs1_in[31:0];
     316              : 
     317              :    assign grev2_d[31:0]       = (rs2_in[1])  ?  {grev1_d[29:28],grev1_d[31:30],grev1_d[25:24],grev1_d[27:26],
     318              :                                                  grev1_d[21:20],grev1_d[23:22],grev1_d[17:16],grev1_d[19:18],
     319              :                                                  grev1_d[13:12],grev1_d[15:14],grev1_d[09:08],grev1_d[11:10],
     320              :                                                  grev1_d[05:04],grev1_d[07:06],grev1_d[01:00],grev1_d[03:02]}  :  grev1_d[31:0];
     321              : 
     322              :    assign grev4_d[31:0]       = (rs2_in[2])  ?  {grev2_d[27:24],grev2_d[31:28],grev2_d[19:16],grev2_d[23:20],
     323              :                                                  grev2_d[11:08],grev2_d[15:12],grev2_d[03:00],grev2_d[07:04]}  :  grev2_d[31:0];
     324              : 
     325              :    assign grev8_d[31:0]       = (rs2_in[3])  ?  {grev4_d[23:16],grev4_d[31:24],grev4_d[07:00],grev4_d[15:08]}  :  grev4_d[31:0];
     326              : 
     327              :    assign grev_d[31:0]        = (rs2_in[4])  ?  {grev8_d[15:00],grev8_d[31:16]}  :  grev8_d[31:0];
     328              : 
     329              : 
     330              : 
     331              : 
     332              :    // * * * * * * * * * * * * * * * * * *  BitManip  :  GORC         * * * * * * * * * * * * * * * * * *
     333              : 
     334              :    // uint32_t gorc32(uint32_t rs1, uint32_t rs2)
     335              :    // {
     336              :    //     uint32_t x = rs1;
     337              :    //     int shamt = rs2 & 31;
     338              :    //
     339              :    //     if (shamt &  1)  x |= ( (x & 0x55555555) <<  1) | ( (x & 0xAAAAAAAA) >>  1);
     340              :    //     if (shamt &  2)  x |= ( (x & 0x33333333) <<  2) | ( (x & 0xCCCCCCCC) >>  2);
     341              :    //     if (shamt &  4)  x |= ( (x & 0x0F0F0F0F) <<  4) | ( (x & 0xF0F0F0F0) >>  4);
     342              :    //     if (shamt &  8)  x |= ( (x & 0x00FF00FF) <<  8) | ( (x & 0xFF00FF00) >>  8);
     343              :    //     if (shamt & 16)  x |= ( (x & 0x0000FFFF) << 16) | ( (x & 0xFFFF0000) >> 16);
     344              :    //
     345              :    //     return x;
     346              :    //  }
     347              : 
     348              : 
     349        31102 :    logic        [31:0]    gorc1_d;
     350        32894 :    logic        [31:0]    gorc2_d;
     351        35793 :    logic        [31:0]    gorc4_d;
     352        80845 :    logic        [31:0]    gorc8_d;
     353        52895 :    logic        [31:0]    gorc_d;
     354              : 
     355              : 
     356              :    assign gorc1_d[31:0]       = ( {32{rs2_in[0]}} & {rs1_in[30],rs1_in[31],rs1_in[28],rs1_in[29],rs1_in[26],rs1_in[27],rs1_in[24],rs1_in[25],
     357              :                                                      rs1_in[22],rs1_in[23],rs1_in[20],rs1_in[21],rs1_in[18],rs1_in[19],rs1_in[16],rs1_in[17],
     358              :                                                      rs1_in[14],rs1_in[15],rs1_in[12],rs1_in[13],rs1_in[10],rs1_in[11],rs1_in[08],rs1_in[09],
     359              :                                                      rs1_in[06],rs1_in[07],rs1_in[04],rs1_in[05],rs1_in[02],rs1_in[03],rs1_in[00],rs1_in[01]} ) | rs1_in[31:0];
     360              : 
     361              :    assign gorc2_d[31:0]       = ( {32{rs2_in[1]}} & {gorc1_d[29:28],gorc1_d[31:30],gorc1_d[25:24],gorc1_d[27:26],
     362              :                                                      gorc1_d[21:20],gorc1_d[23:22],gorc1_d[17:16],gorc1_d[19:18],
     363              :                                                      gorc1_d[13:12],gorc1_d[15:14],gorc1_d[09:08],gorc1_d[11:10],
     364              :                                                      gorc1_d[05:04],gorc1_d[07:06],gorc1_d[01:00],gorc1_d[03:02]} ) | gorc1_d[31:0];
     365              : 
     366              :    assign gorc4_d[31:0]       = ( {32{rs2_in[2]}} & {gorc2_d[27:24],gorc2_d[31:28],gorc2_d[19:16],gorc2_d[23:20],
     367              :                                                      gorc2_d[11:08],gorc2_d[15:12],gorc2_d[03:00],gorc2_d[07:04]} ) | gorc2_d[31:0];
     368              : 
     369              :    assign gorc8_d[31:0]       = ( {32{rs2_in[3]}} & {gorc4_d[23:16],gorc4_d[31:24],gorc4_d[07:00],gorc4_d[15:08]} ) | gorc4_d[31:0];
     370              : 
     371              :    assign gorc_d[31:0]        = ( {32{rs2_in[4]}} & {gorc8_d[15:00],gorc8_d[31:16]} ) | gorc8_d[31:0];
     372              : 
     373              : 
     374              : 
     375              : 
     376              :    // * * * * * * * * * * * * * * * * * *  BitManip  :  SHFL, UNSHLF * * * * * * * * * * * * * * * * * *
     377              : 
     378              :    // uint32_t shuffle32_stage (uint32_t src, uint32_t maskL, uint32_t maskR, int N)
     379              :    // {
     380              :    //     uint32_t x  = src & ~(maskL | maskR);
     381              :    //     x          |= ((src << N) & maskL) | ((src >> N) & maskR);
     382              :    //     return x;
     383              :    // }
     384              :    //
     385              :    //
     386              :    //
     387              :    // uint32_t shfl32(uint32_t rs1, uint32_t rs2)
     388              :    // {
     389              :    //     uint32_t x = rs1;
     390              :    //     int shamt = rs2 & 15
     391              :    //
     392              :    //     if (shamt & 8)  x = shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8);
     393              :    //     if (shamt & 4)  x = shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4);
     394              :    //     if (shamt & 2)  x = shuffle32_stage(x, 0x30303030, 0xc0c0c0c0, 2);
     395              :    //     if (shamt & 1)  x = shuffle32_stage(x, 0x44444444, 0x22222222, 1);
     396              :    //
     397              :    //     return x;
     398              :    // }
     399              : 
     400              : 
     401        20634 :    logic        [31:0]    shfl8_d;
     402        37767 :    logic        [31:0]    shfl4_d;
     403        37878 :    logic        [31:0]    shfl2_d;
     404        35804 :    logic        [31:0]    shfl_d;
     405              : 
     406              : 
     407              : 
     408              :    assign shfl8_d[31:0]       = (rs2_in[3])  ?  {rs1_in[31:24],rs1_in[15:08],rs1_in[23:16],rs1_in[07:00]}      :  rs1_in[31:0];
     409              : 
     410              :    assign shfl4_d[31:0]       = (rs2_in[2])  ?  {shfl8_d[31:28],shfl8_d[23:20],shfl8_d[27:24],shfl8_d[19:16],
     411              :                                                  shfl8_d[15:12],shfl8_d[07:04],shfl8_d[11:08],shfl8_d[03:00]}  :  shfl8_d[31:0];
     412              : 
     413              :    assign shfl2_d[31:0]       = (rs2_in[1])  ?  {shfl4_d[31:30],shfl4_d[27:26],shfl4_d[29:28],shfl4_d[25:24],
     414              :                                                  shfl4_d[23:22],shfl4_d[19:18],shfl4_d[21:20],shfl4_d[17:16],
     415              :                                                  shfl4_d[15:14],shfl4_d[11:10],shfl4_d[13:12],shfl4_d[09:08],
     416              :                                                  shfl4_d[07:06],shfl4_d[03:02],shfl4_d[05:04],shfl4_d[01:00]}  :  shfl4_d[31:0];
     417              : 
     418              :    assign shfl_d[31:0]        = (rs2_in[0])  ?  {shfl2_d[31],shfl2_d[29],shfl2_d[30],shfl2_d[28],shfl2_d[27],shfl2_d[25],shfl2_d[26],shfl2_d[24],
     419              :                                                  shfl2_d[23],shfl2_d[21],shfl2_d[22],shfl2_d[20],shfl2_d[19],shfl2_d[17],shfl2_d[18],shfl2_d[16],
     420              :                                                  shfl2_d[15],shfl2_d[13],shfl2_d[14],shfl2_d[12],shfl2_d[11],shfl2_d[09],shfl2_d[10],shfl2_d[08],
     421              :                                                  shfl2_d[07],shfl2_d[05],shfl2_d[06],shfl2_d[04],shfl2_d[03],shfl2_d[01],shfl2_d[02],shfl2_d[00]}  :  shfl2_d[31:0];
     422              : 
     423              : 
     424              : 
     425              : 
     426              :    // uint32_t unshfl32(uint32_t rs1, uint32_t rs2)
     427              :    // {
     428              :    //     uint32_t x = rs1;
     429              :    //     int shamt = rs2 & 15
     430              :    //
     431              :    //     if (shamt & 1)  x = shuffle32_stage(x, 0x44444444, 0x22222222, 1);
     432              :    //     if (shamt & 2)  x = shuffle32_stage(x, 0x30303030, 0xc0c0c0c0, 2);
     433              :    //     if (shamt & 4)  x = shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4);
     434              :    //     if (shamt & 8)  x = shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8);
     435              :    //
     436              :    //     return x;
     437              :    // }
     438              : 
     439              : 
     440        20262 :    logic        [31:0]    unshfl1_d;
     441        20450 :    logic        [31:0]    unshfl2_d;
     442        39731 :    logic        [31:0]    unshfl4_d;
     443        32020 :    logic        [31:0]    unshfl_d;
     444              : 
     445              : 
     446              :    assign unshfl1_d[31:0]     = (rs2_in[0])  ?  {rs1_in[31],rs1_in[29],rs1_in[30],rs1_in[28],rs1_in[27],rs1_in[25],rs1_in[26],rs1_in[24],
     447              :                                                  rs1_in[23],rs1_in[21],rs1_in[22],rs1_in[20],rs1_in[19],rs1_in[17],rs1_in[18],rs1_in[16],
     448              :                                                  rs1_in[15],rs1_in[13],rs1_in[14],rs1_in[12],rs1_in[11],rs1_in[09],rs1_in[10],rs1_in[08],
     449              :                                                  rs1_in[07],rs1_in[05],rs1_in[06],rs1_in[04],rs1_in[03],rs1_in[01],rs1_in[02],rs1_in[00]}  :  rs1_in[31:0];
     450              : 
     451              :    assign unshfl2_d[31:0]     = (rs2_in[1])  ?  {unshfl1_d[31:30],unshfl1_d[27:26],unshfl1_d[29:28],unshfl1_d[25:24],
     452              :                                                  unshfl1_d[23:22],unshfl1_d[19:18],unshfl1_d[21:20],unshfl1_d[17:16],
     453              :                                                  unshfl1_d[15:14],unshfl1_d[11:10],unshfl1_d[13:12],unshfl1_d[09:08],
     454              :                                                  unshfl1_d[07:06],unshfl1_d[03:02],unshfl1_d[05:04],unshfl1_d[01:00]}  :  unshfl1_d[31:0];
     455              : 
     456              :    assign unshfl4_d[31:0]     = (rs2_in[2])  ?  {unshfl2_d[31:28],unshfl2_d[23:20],unshfl2_d[27:24],unshfl2_d[19:16],
     457              :                                                  unshfl2_d[15:12],unshfl2_d[07:04],unshfl2_d[11:08],unshfl2_d[03:00]}  :  unshfl2_d[31:0];
     458              : 
     459              :    assign unshfl_d[31:0]      = (rs2_in[3])  ?  {unshfl4_d[31:24],unshfl4_d[15:08],unshfl4_d[23:16],unshfl4_d[07:00]}  :  unshfl4_d[31:0];
     460              : 
     461              : 
     462              : 
     463              : 
     464              :    // * * * * * * * * * * * * * * * * * *  BitManip  :  XPERM          * * * * * * * * * * * * * * * * *
     465              : 
     466              : //
     467              : // These instructions operate on nibbles/bytes/half-words/words.
     468              : // rs1 is a vector of data words and rs2 is a vector of indices into rs1.
     469              : // The result of the instruction is the vector rs2 with each element replaced by the corresponding data word from rs1,
     470              : // or zero then the index in rs2 is out of bounds.
     471              : //
     472              : //   uint_xlen_t xperm(uint_xlen_t rs1, uint_xlen_t rs2, int sz_log2)
     473              : //   {
     474              : //       uint_xlen_t r = 0;
     475              : //       uint_xlen_t sz = 1LL << sz_log2;
     476              : //       uint_xlen_t mask = (1LL << sz) - 1;
     477              : //       for (int i = 0; i < XLEN; i += sz)
     478              : //           { uint_xlen_t pos = ((rs2 >> i) & mask) << sz_log2;
     479              : //             if (pos < XLEN)
     480              : //                 r |= ((rs1 >> pos) & mask) << i;
     481              : //           }
     482              : //       return r;
     483              : //   }
     484              : //
     485              : // uint_xlen_t xperm_n (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 2); }
     486              : // uint_xlen_t xperm_b (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 3); }
     487              : // uint_xlen_t xperm_h (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 4); }
     488              : // uint_xlen_t xperm_w (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 5); }   Not part of RV32
     489              : //
     490              : // The xperm.[nbhw] instructions can be implemented with an XLEN/4-lane nibble-wide crossbarswitch.
     491              : 
     492              : // *** XPERM_B ***
     493              : 
     494              :    // XLEN    = 32
     495              :    // SZ_LOG2 =  3
     496              :    // SZ      = 4'd8;
     497              :    // MASK    = ( 1 << 8 ) - 1
     498              :    //         = 8'hFF
     499              : 
     500              :    // integer                xperm_b_i;
     501              :    // logic        [31:0]    xperm_b_r;
     502              :    // logic        [3:0]     xperm_b_sz;
     503              :    // logic        [7:0]     xperm_b_mask;
     504              :    // logic        [31:0]    xperm_b_pos;
     505              :    //
     506              :    //
     507              :    // assign xperm_b_sz[3:0]        =  4'd8;
     508              :    // assign xperm_b_mask[7:0]      =  8'hff;
     509              :    //
     510              :    // always_comb
     511              :    //   begin
     512              :    //     xperm_b_r[31:0] = 32'b0;
     513              :    //
     514              :    //     for (xperm_b_i=0; xperm_b_i<32; xperm_b_i = xperm_b_i + xperm_b_sz)     // This code did not work...
     515              :    //       begin
     516              :    //         xperm_b_pos[31:0] = ( (rs2_in[31:0] >> xperm_b_i) & {24'h0,xperm_b_mask[7:0]} ) << 3;
     517              :    //         if (xperm_b_pos[31:0] < 32'd32)
     518              :    //            xperm_b_r[31:0] = xperm_b_r[31:0] | ( ((rs1_in[31:0] >> xperm_b_pos[4:0]) & {24'h0,xperm_b_mask[7:0]}) << xperm_b_i );
     519              :    //       end
     520              :    //   end
     521              : 
     522        20236 :    logic        [31:0]    xperm_n;
     523        59290 :    logic        [31:0]    xperm_b;
     524         7230 :    logic        [31:0]    xperm_h;
     525              : 
     526              :    assign xperm_n[03:00]         =  { 4{    ~rs2_in[03]     }} & 4'( (rs1_in[31:0] >> {rs2_in[02:00],2'b0}) &     4'hf );   // This is a 8:1 mux with qualified selects
     527              :    assign xperm_n[07:04]         =  { 4{    ~rs2_in[07]     }} & 4'( (rs1_in[31:0] >> {rs2_in[06:04],2'b0}) &     4'hf );
     528              :    assign xperm_n[11:08]         =  { 4{    ~rs2_in[11]     }} & 4'( (rs1_in[31:0] >> {rs2_in[10:08],2'b0}) &     4'hf );
     529              :    assign xperm_n[15:12]         =  { 4{    ~rs2_in[15]     }} & 4'( (rs1_in[31:0] >> {rs2_in[14:12],2'b0}) &     4'hf );
     530              :    assign xperm_n[19:16]         =  { 4{    ~rs2_in[19]     }} & 4'( (rs1_in[31:0] >> {rs2_in[18:16],2'b0}) &     4'hf );
     531              :    assign xperm_n[23:20]         =  { 4{    ~rs2_in[23]     }} & 4'( (rs1_in[31:0] >> {rs2_in[22:20],2'b0}) &     4'hf );
     532              :    assign xperm_n[27:24]         =  { 4{    ~rs2_in[27]     }} & 4'( (rs1_in[31:0] >> {rs2_in[26:24],2'b0}) &     4'hf );
     533              :    assign xperm_n[31:28]         =  { 4{    ~rs2_in[31]     }} & 4'( (rs1_in[31:0] >> {rs2_in[30:28],2'b0}) &     4'hf );
     534              : 
     535              :    assign xperm_b[07:00]         =  { 8{ ~(| rs2_in[07:02]) }} & 8'( (rs1_in[31:0] >> {rs2_in[01:00],3'b0}) &    8'hff );   // This is a 4:1 mux with qualified selects
     536              :    assign xperm_b[15:08]         =  { 8{ ~(| rs2_in[15:10]) }} & 8'( (rs1_in[31:0] >> {rs2_in[09:08],3'b0}) &    8'hff );
     537              :    assign xperm_b[23:16]         =  { 8{ ~(| rs2_in[23:18]) }} & 8'( (rs1_in[31:0] >> {rs2_in[17:16],3'b0}) &    8'hff );
     538              :    assign xperm_b[31:24]         =  { 8{ ~(| rs2_in[31:26]) }} & 8'( (rs1_in[31:0] >> {rs2_in[25:24],3'b0}) &    8'hff );
     539              : 
     540              :    assign xperm_h[15:00]         =  {16{ ~(| rs2_in[15:01]) }} & 16'( (rs1_in[31:0] >> {rs2_in[00]   ,4'b0}) & 16'hffff );   // This is a 2:1 mux with qualified selects
     541              :    assign xperm_h[31:16]         =  {16{ ~(| rs2_in[31:17]) }} & 16'( (rs1_in[31:0] >> {rs2_in[16]   ,4'b0}) & 16'hffff );
     542              : 
     543              : 
     544              : 
     545              : 
     546              :    // * * * * * * * * * * * * * * * * * *  BitManip  :  CRC32, CRC32c  * * * * * * * * * * * * * * * * *
     547              : 
     548              :    // ***  computed from   https: //crccalc.com  ***
     549              :    //
     550              :    // "a" is 8'h61 = 8'b0110_0001    (8'h61 ^ 8'hff = 8'h9e)
     551              :    //
     552              :    // Input must first be XORed with 32'hffff_ffff
     553              :    //
     554              :    //
     555              :    // CRC32
     556              :    //
     557              :    // Input    Output        Input      Output
     558              :    // -----   --------      --------   --------
     559              :    // "a"     e8b7be43      ffffff9e   174841bc
     560              :    // "aa"    078a19d7      ffff9e9e   f875e628
     561              :    // "aaaa"  ad98e545      9e9e9e9e   5267a1ba
     562              :    //
     563              :    //
     564              :    //
     565              :    // CRC32c
     566              :    //
     567              :    // Input    Output        Input      Output
     568              :    // -----   --------      --------   --------
     569              :    // "a"     c1d04330      ffffff9e   3e2fbccf
     570              :    // "aa"    f1f2dac2      ffff9e9e   0e0d253d
     571              :    // "aaaa"  6a52eeb0      9e9e9e9e   95ad114f
     572              : 
     573              : 
     574            0 :    logic                  crc32_all;
     575          318 :    logic        [31:0]    crc32_poly_rev;
     576          318 :    logic        [31:0]    crc32c_poly_rev;
     577              :    integer                crc32_bi, crc32_hi, crc32_wi, crc32c_bi, crc32c_hi, crc32c_wi;
     578        47786 :    logic        [31:0]    crc32_bd, crc32_hd, crc32_wd, crc32c_bd, crc32c_hd, crc32c_wd;
     579              : 
     580              : 
     581              :    assign crc32_all              =  ap_crc32_b  | ap_crc32_h  | ap_crc32_w | ap_crc32c_b | ap_crc32c_h | ap_crc32c_w;
     582              : 
     583              :    assign crc32_poly_rev[31:0]   =  32'hEDB88320;    // bit reverse of 32'h04C11DB7
     584              :    assign crc32c_poly_rev[31:0]  =  32'h82F63B78;    // bit reverse of 32'h1EDC6F41
     585              : 
     586              : 
     587          318 :    always_comb
     588          318 :      begin
     589          318 :        crc32_bd[31:0]            =  rs1_in[31:0];
     590              : 
     591          318 :        for (crc32_bi=0; crc32_bi<8; crc32_bi++)
     592         2544 :          begin
     593         2544 :             crc32_bd[31:0] = (crc32_bd[31:0] >> 1) ^ (crc32_poly_rev[31:0] & {32{crc32_bd[0]}});
     594              :          end      // FOR    crc32_bi
     595              :      end          // ALWAYS_COMB
     596              : 
     597              : 
     598          318 :    always_comb
     599          318 :      begin
     600          318 :        crc32_hd[31:0]            =  rs1_in[31:0];
     601              : 
     602          318 :        for (crc32_hi=0; crc32_hi<16; crc32_hi++)
     603         5088 :          begin
     604         5088 :             crc32_hd[31:0] = (crc32_hd[31:0] >> 1) ^ (crc32_poly_rev[31:0] & {32{crc32_hd[0]}});
     605              :          end      // FOR    crc32_hi
     606              :      end          // ALWAYS_COMB
     607              : 
     608              : 
     609          318 :    always_comb
     610          318 :      begin
     611          318 :        crc32_wd[31:0]            =  rs1_in[31:0];
     612              : 
     613          318 :        for (crc32_wi=0; crc32_wi<32; crc32_wi++)
     614        10176 :          begin
     615        10176 :             crc32_wd[31:0] = (crc32_wd[31:0] >> 1) ^ (crc32_poly_rev[31:0] & {32{crc32_wd[0]}});
     616              :          end      // FOR    crc32_wi
     617              :      end          // ALWAYS_COMB
     618              : 
     619              : 
     620              : 
     621              : 
     622          318 :    always_comb
     623          318 :      begin
     624          318 :        crc32c_bd[31:0]           =  rs1_in[31:0];
     625              : 
     626          318 :        for (crc32c_bi=0; crc32c_bi<8; crc32c_bi++)
     627         2544 :          begin
     628         2544 :             crc32c_bd[31:0] = (crc32c_bd[31:0] >> 1) ^ (crc32c_poly_rev[31:0] & {32{crc32c_bd[0]}});
     629              :          end      // FOR    crc32c_bi
     630              :      end          // ALWAYS_COMB
     631              : 
     632              : 
     633          318 :    always_comb
     634          318 :      begin
     635          318 :        crc32c_hd[31:0]           =  rs1_in[31:0];
     636              : 
     637          318 :        for (crc32c_hi=0; crc32c_hi<16; crc32c_hi++)
     638         5088 :          begin
     639         5088 :             crc32c_hd[31:0] = (crc32c_hd[31:0] >> 1) ^ (crc32c_poly_rev[31:0] & {32{crc32c_hd[0]}});
     640              :          end      // FOR    crc32c_hi
     641              :      end          // ALWAYS_COMB
     642              : 
     643              : 
     644          318 :    always_comb
     645          318 :      begin
     646          318 :        crc32c_wd[31:0]           =  rs1_in[31:0];
     647              : 
     648          318 :        for (crc32c_wi=0; crc32c_wi<32; crc32c_wi++)
     649        10176 :          begin
     650        10176 :             crc32c_wd[31:0] = (crc32c_wd[31:0] >> 1) ^ (crc32c_poly_rev[31:0] & {32{crc32c_wd[0]}});
     651              :          end      // FOR    crc32c_wi
     652              :      end          // ALWAYS_COMB
     653              : 
     654              : 
     655              : 
     656              : 
     657              : 
     658              :    // * * * * * * * * * * * * * * * * * *  BitManip  :  BFP          * * * * * * * * * * * * * * * * * *
     659              : 
     660              : 
     661              :    // uint_xlen_t bfp(uint_xlen_t rs1, uint_xlen_t rs2)
     662              :    // {
     663              :    //    uint_xlen_t cfg = rs2 >> (XLEN/2);
     664              :    //    if ((cfg >> 30) == 2) cfg = cfg >> 16;
     665              :    //    int len          = (cfg >> 8) & (XLEN/2-1);
     666              :    //    int off          = cfg & (XLEN-1);
     667              :    //    len              = len ? len : XLEN/2;
     668              :    //    uint_xlen_t mask = slo(0, len) << off;
     669              :    //    uint_xlen_t data = rs2 << off;
     670              :    //    return (data & mask) | (rs1 & ~mask);
     671              : 
     672              : 
     673        27341 :    logic        [4:0]     bfp_len;
     674        27751 :    logic        [4:0]     bfp_off;
     675          276 :    logic        [31:0]    bfp_len_mask_;
     676          318 :    logic        [31:0]    bfp_off_mask_;
     677        25762 :    logic        [15:0]    bfp_preshift_data;
     678         4976 :    logic        [31:0]    bfp_shift_data;
     679        10408 :    logic        [31:0]    bfp_shift_mask;
     680        19594 :    logic        [31:0]    bfp_result_d;
     681              : 
     682              : 
     683              :    assign bfp_len[3:0]           =  rs2_in[27:24];
     684              :    assign bfp_len[4]             = (bfp_len[3:0] == 4'b0);   // If LEN field is zero, then LEN=16
     685              :    assign bfp_off[4:0]           =  rs2_in[20:16];
     686              : 
     687              :    assign bfp_len_mask_[31:0]    =  32'hffff_ffff  <<  bfp_len[4:0];
     688              :    assign bfp_off_mask_[31:0]    =  32'hffff_ffff  <<  bfp_off[4:0];
     689              :    assign bfp_preshift_data[15:0]=  rs2_in[15:0] & ~bfp_len_mask_[15:0];
     690              : 
     691              :    assign bfp_shift_data[31:0]   = {16'b0,bfp_preshift_data[15:0]}  <<  bfp_off[4:0];
     692              :    assign bfp_shift_mask[31:0]   = (bfp_len_mask_[31:0]             <<  bfp_off[4:0]) | ~bfp_off_mask_[31:0];
     693              : 
     694              :    assign bfp_result_d[31:0]     = bfp_shift_data[31:0] | (rs1_in[31:0] & bfp_shift_mask[31:0]);
     695              : 
     696              : 
     697              : 
     698              : 
     699              :    // * * * * * * * * * * * * * * * * * *  BitManip  :  Common logic * * * * * * * * * * * * * * * * * *
     700              : 
     701              : 
     702              :    assign bitmanip_sel_d         =  ap_bcompress | ap_bdecompress | ap_clmul | ap_clmulh | ap_clmulr | ap_grev | ap_gorc | ap_shfl | ap_unshfl | crc32_all | ap_bfp | ap_xperm_n | ap_xperm_b | ap_xperm_h;
     703              : 
     704              :    assign bitmanip_d[31:0]       = ( {32{ap_bcompress}}    &       bcompress_d[31:0]   ) |
     705              :                                    ( {32{ap_bdecompress}}  &       bdecompress_d[31:0] ) |
     706              :                                    ( {32{ap_clmul}}        &       clmul_raw_d[31:0]   ) |
     707              :                                    ( {32{ap_clmulh}}       & {1'b0,clmul_raw_d[62:32]} ) |
     708              :                                    ( {32{ap_clmulr}}       &       clmul_raw_d[62:31]  ) |
     709              :                                    ( {32{ap_grev}}         &       grev_d[31:0]        ) |
     710              :                                    ( {32{ap_gorc}}         &       gorc_d[31:0]        ) |
     711              :                                    ( {32{ap_shfl}}         &       shfl_d[31:0]        ) |
     712              :                                    ( {32{ap_unshfl}}       &       unshfl_d[31:0]      ) |
     713              :                                    ( {32{ap_crc32_b}}      &       crc32_bd[31:0]      ) |
     714              :                                    ( {32{ap_crc32_h}}      &       crc32_hd[31:0]      ) |
     715              :                                    ( {32{ap_crc32_w}}      &       crc32_wd[31:0]      ) |
     716              :                                    ( {32{ap_crc32c_b}}     &       crc32c_bd[31:0]     ) |
     717              :                                    ( {32{ap_crc32c_h}}     &       crc32c_hd[31:0]     ) |
     718              :                                    ( {32{ap_crc32c_w}}     &       crc32c_wd[31:0]     ) |
     719              :                                    ( {32{ap_bfp}}          &       bfp_result_d[31:0]  ) |
     720              :                                    ( {32{ap_xperm_n}}      &       xperm_n[31:0]       ) |
     721              :                                    ( {32{ap_xperm_b}}      &       xperm_b[31:0]       ) |
     722              :                                    ( {32{ap_xperm_h}}      &       xperm_h[31:0]       );
     723              : 
     724              : 
     725              : 
     726              :    rvdffe #(33) i_bitmanip_ff    (.*, .clk(clk),  .din({bitmanip_sel_d,bitmanip_d[31:0]}),   .dout({bitmanip_sel_x,bitmanip_x[31:0]}),   .en(bit_x_enable));
     727              : 
     728              : 
     729              : 
     730              : 
     731              :    assign result_x[31:0]         =  ( {32{~bitmanip_sel_x & ~low_x}} & prod_x[63:32]    ) |
     732              :                                     ( {32{~bitmanip_sel_x &  low_x}} & prod_x[31:0]     ) |
     733              :                                                                        bitmanip_x[31:0];
     734              : 
     735              : 
     736              : 
     737              : endmodule  // el2_exu_mul_ctl