Skip to content

AVX512 modifications for LDPC encoding: interleaving, ZC384 BG1, output...

knopp requested to merge ldpc_enc_avx512 into develop

AVX512 modifications for LDPC encoding: interleaving, ZC384 BG1, output formatting for encoder, segmentation (memcpy instead of loop)

This improves the overall performance of the NR DL transmitter in gNB. Here is a summary of times on some machines at EURECOM (note: rk_integration1224 is not an MR, it is this MR combined with MR !3127) matix = 5.9 GHz, Ryzen Gen4, peafowl = 4.1 GHz EPYC 9374F, stupix = 3.6 GHz Xeon Gold 6354, broadbill = 3.0 GHz EPYC 8534P

sudo ./nr_dlsim -n100 -P -x2 -y4 -z4 -R273 -b273 -e 25 -s30

ldpc_enc_avx512 (matix) 359.20 us
develop (matix) 394.56 us
ldpc_enc_avx512 (peafowl) 523.93 us
develop (peafowl) 554.98 us
ldpc_enc_avx512 (falcon-gh200) 699.99 us
develop (falcon-gh200) 809.76 us
 
rk_integration1224 (peafowl) 476.95 us
rk_integration1224 (matix) 331.61 us
rk_integration1224 (stupix) 707.64 us
rk_integration1224 (stupix, --noavx512) 638 us
rk_integration1224 (broadbill) 760us
rk_integration1224 (broadbill, --noavx512) 844.66 us
rk_integration1224 (falcon-gh200) 647.58 us

sudo ./nr_dlsim -n100 -P -x2 -y4 -z4 -R273 -b273 -e 25 -s30 -X 8,9,10,11,12

ldpc_enc_avx512 (peafowl) 396.68 us
develop (peafowl) 407.75 us
ldpc_enc_avx512 (matix) 378.11 (-1,-1,-1,-1-,1)
develop (matix) 383.98 (-1,-1,-1,-1,-1)
ldpc_enc_avx512 (falcon-gh200) 441.99 us (4,5,6,7,8)
develop (falcon-gh200) 494.40 us (4,5,6,7,8)

rk_integration1224 (peafowl) 346.74 us
rk_integration1224 (matix) 348.25 us
rk_integration1224 (stupix) 502 us
rk_integration1224 (stupix, --noavx512) 486 us
rk_integration1224 (broadbill) 558us
rk_integration1224 (broadbill, --noavx512) 594.16 us
rk_integration1224 (falcon-gh200) 392.65 us (4,5,6,7,8)

sudo ./nr_dlsim -n100 -P -x2 -y4 -z4 -R273 -b273 -e 25 -s30 -q1

ldpc_enc_avx512 (matix) 492.69 us
develop (matix) 533.98 us
ldpc_enc_avx512 (peafowl) 686.90 us
develop (peafowl) 735.42 us
ldpc_env_avx512 (falcon-gh200) 809.67 us 
develop (falcon-gh200) 1089 us
rk_integration1224 (peafowl) 634.33 us
rk_integration1224 (matix) 447.87 us
rk_integration1224 (stupix) 923.16 us
rk_integration1224 (stupix, --noavx512) 771us
rk_integration1224 (broadbill) 988us
rk_integration1224 (broadbill, --noavx512) 970us
rk_integration1224 (falcon-gh200)  750.50

sudo ./nr_dlsim -n100 -P -x2 -y4 -z4 -R273 -b273 -e 25 -s30 -q1 -X 8,9,10,11,12

ldpc_enc_avx512 (matix) 506.55 us (-1,-1,-1,-1,-1)
develop (matix) 481.90 (-1,-1,-1,-1,-1)
ldpc_enc_avx512 (peafowl) 456.70 us
develop (peafowl) 466.49
ldpc_enc_avx512 (falcon-gh200) 498.82 us (4,5,6,7,8)
develop (falcon-gh200) 573.81 us (4,5,6,7,8)
rk_integration1224 (peafowl) 407.07
rk_integration1224 (matix) 474.84 us
rk_integration1224 (stupix) 600 us
rk_integration1224 (stupix, --noavx512) 570us
rk_integration1224 (broadbill) 598us
rk_integration1224 (broadbill, --noavx512) 613.77us
rk_integration1224 (falcon-gh200) 435.74 us (4,5,6,7,8)
Edited by knopp

Merge request reports

Loading