diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c index ffcad00af4d2b8eddd7361b92fc6674af3d97e14..3379742b5be8a0c3019dfc58a29f3473704d7550 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c @@ -1,4 +1,5 @@ + /* * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more * contributor license agreements. See the NOTICE file distributed with @@ -328,17 +329,17 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP { case 13: { - nrLDPC_bnProcPc_BG1_R13_AVX2(p_procBuf->bnProcBuf,p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); + nrLDPC_bnProcPc_BG1_R13_AVX2(p_procBuf->bnProcBuf,p_procBuf->bnProcBufRes,p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); break; } case 23: { - nrLDPC_bnProcPc_BG1_R23_AVX2(p_procBuf->bnProcBuf,p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); + nrLDPC_bnProcPc_BG1_R23_AVX2(p_procBuf->bnProcBuf,p_procBuf->bnProcBufRes, p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); break; } case 89: { - nrLDPC_bnProcPc_BG1_R89_AVX2(p_procBuf->bnProcBuf,p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); + nrLDPC_bnProcPc_BG1_R89_AVX2(p_procBuf->bnProcBuf,p_procBuf->bnProcBufRes, p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); break; } } @@ -349,20 +350,20 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP { case 15: { - nrLDPC_bnProcPc_BG2_R15_AVX2(p_procBuf->bnProcBuf,p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); + nrLDPC_bnProcPc_BG2_R15_AVX2(p_procBuf->bnProcBuf,p_procBuf->bnProcBufRes, p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); break; } case 13: { - nrLDPC_bnProcPc_BG2_R13_AVX2(p_procBuf->bnProcBuf,p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); + nrLDPC_bnProcPc_BG2_R13_AVX2(p_procBuf->bnProcBuf,p_procBuf->bnProcBufRes,p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); break; } case 23: { - nrLDPC_bnProcPc_BG2_R23_AVX2(p_procBuf->bnProcBuf,p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); + nrLDPC_bnProcPc_BG2_R23_AVX2(p_procBuf->bnProcBuf,p_procBuf->bnProcBufRes,p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); break; } @@ -614,17 +615,17 @@ if (BG==1) { case 13: { - nrLDPC_bnProcPc_BG1_R13_AVX2(p_procBuf->bnProcBuf,p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); + nrLDPC_bnProcPc_BG1_R13_AVX2(p_procBuf->bnProcBuf,p_procBuf->bnProcBufRes,p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); break; } case 23: { - nrLDPC_bnProcPc_BG1_R23_AVX2(p_procBuf->bnProcBuf,p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); + nrLDPC_bnProcPc_BG1_R23_AVX2(p_procBuf->bnProcBuf,p_procBuf->bnProcBufRes, p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); break; } case 89: { - nrLDPC_bnProcPc_BG1_R89_AVX2(p_procBuf->bnProcBuf,p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); + nrLDPC_bnProcPc_BG1_R89_AVX2(p_procBuf->bnProcBuf,p_procBuf->bnProcBufRes, p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); break; } } @@ -635,20 +636,20 @@ if (BG==1) { case 15: { - nrLDPC_bnProcPc_BG2_R15_AVX2(p_procBuf->bnProcBuf,p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); + nrLDPC_bnProcPc_BG2_R15_AVX2(p_procBuf->bnProcBuf,p_procBuf->bnProcBufRes,p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); break; } case 13: { - nrLDPC_bnProcPc_BG2_R13_AVX2(p_procBuf->bnProcBuf,p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); + nrLDPC_bnProcPc_BG2_R13_AVX2(p_procBuf->bnProcBuf,p_procBuf->bnProcBufRes,p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); break; } case 23: { - nrLDPC_bnProcPc_BG2_R23_AVX2(p_procBuf->bnProcBuf,p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); + nrLDPC_bnProcPc_BG2_R23_AVX2(p_procBuf->bnProcBuf,p_procBuf->bnProcBufRes,p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); break; } @@ -723,7 +724,7 @@ if (BG==1) #ifdef __AVX512BW__ nrLDPC_bnProc_BG2_R13_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); #else - nrLDPC_bnProc_BG2_R13_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + nrLDPC_bnProc_BG2_R13_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,+p_procBuf->llrRes, Z); #endif break; } @@ -906,7 +907,7 @@ if (BG==1) #ifdef NR_LDPC_PROFILER_DETAIL start_meas(&p_profiler->bnProcPc); #endif - //nrLDPC_bnProcPc(p_lut, p_procBuf, Z); + // nrLDPC_bnProcPc(p_lut, p_procBuf, Z); if (BG==1) { @@ -914,17 +915,17 @@ if (BG==1) { case 13: { - nrLDPC_bnProcPc_BG1_R13_AVX2(p_procBuf->bnProcBuf,p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); + nrLDPC_bnProcPc_BG1_R13_AVX2(p_procBuf->bnProcBuf,p_procBuf->bnProcBufRes,p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); break; } case 23: { - nrLDPC_bnProcPc_BG1_R23_AVX2(p_procBuf->bnProcBuf,p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); + nrLDPC_bnProcPc_BG1_R23_AVX2(p_procBuf->bnProcBuf,p_procBuf->bnProcBufRes, p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); break; } case 89: { - nrLDPC_bnProcPc_BG1_R89_AVX2(p_procBuf->bnProcBuf,p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); + nrLDPC_bnProcPc_BG1_R89_AVX2(p_procBuf->bnProcBuf,p_procBuf->bnProcBufRes, p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); break; } } @@ -935,20 +936,20 @@ if (BG==1) { case 15: { - nrLDPC_bnProcPc_BG2_R15_AVX2(p_procBuf->bnProcBuf, p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); + nrLDPC_bnProcPc_BG2_R15_AVX2(p_procBuf->bnProcBuf,p_procBuf->bnProcBufRes, p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); break; } case 13: { - nrLDPC_bnProcPc_BG2_R13_AVX2(p_procBuf->bnProcBuf,p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); + nrLDPC_bnProcPc_BG2_R13_AVX2(p_procBuf->bnProcBuf,p_procBuf->bnProcBufRes,p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); break; } case 23: { - nrLDPC_bnProcPc_BG2_R23_AVX2(p_procBuf->bnProcBuf,p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); + nrLDPC_bnProcPc_BG2_R23_AVX2(p_procBuf->bnProcBuf,p_procBuf->bnProcBufRes,p_procBuf->llrRes, p_procBuf->llrProcBuf, Z); break; } @@ -1136,3 +1137,4 @@ if (BG==1) + diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG1_avx2.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG1_avx2.c index fd015580495e5c1832ff666653ad45e189d5b88a..3207a7a308266024a2e8ab6452ad529491004618 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG1_avx2.c +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG1_avx2.c @@ -1,6 +1,7 @@ + #include <stdint.h> #include <immintrin.h> #include "../../nrLDPCdecoder_defs.h" @@ -24,7 +25,7 @@ void nrLDPC_bnProcPc_BG1_generator_AVX2(int R) // fprintf(fd,"#include <stdint.h>\n"); // fprintf(fd,"#include <immintrin.h>\n"); - fprintf(fd,"static inline void nrLDPC_bnProcPc_BG1_R%s_AVX2(int8_t* bnProcBuf,int8_t* llrRes , int8_t* llrProcBuf, uint16_t Z ) {\n",ratestr[R]); + fprintf(fd,"static inline void nrLDPC_bnProcPc_BG1_R%s_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes,int8_t* llrRes , int8_t* llrProcBuf, uint16_t Z ) {\n",ratestr[R]); const uint8_t* lut_numBnInBnGroups; const uint32_t* lut_startAddrBnGroups; const uint16_t* lut_startAddrBnGroupsLlr; @@ -64,16 +65,18 @@ void nrLDPC_bnProcPc_BG1_generator_AVX2(int R) fprintf(fd," __m128i* p_bnProcBuf; \n"); fprintf(fd," __m128i* p_llrProcBuf;\n"); fprintf(fd," __m256i* p_llrRes; \n"); + // fprintf(fd," __m256i* p_bnProcBufRes; \n"); +// fprintf(fd," __m256i* p_llrProcBuf256; \n"); fprintf(fd," uint32_t M ;\n"); -fprintf(fd, "// Process group with 1 CNs \n"); - + fprintf(fd, "// Process group with 1 CNs \n"); - // Process group with 2 CNs /* - if (lut_numBnInBnGroups[0] > 0) - { + // Process group with 1 CNs + + // if (lut_numBnInBnGroups[0] > 0) + // { // If elements in group move to next address // idxBnGroup++; @@ -81,49 +84,44 @@ fprintf(fd, "// Process group with 1 CNs \n"); fprintf(fd," M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[0] ); // Set the offset to each CN within a group in terms of 16 Byte - cnOffsetInGroup = (lut_numBnInBnGroups[0]*NR_LDPC_ZMAX)>>4; + // cnOffsetInGroup = (lut_numBnInBnGroups[0]*NR_LDPC_ZMAX)>>4; // Set pointers to start of group 2 fprintf(fd," p_bnProcBuf = (__m128i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + // fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); fprintf(fd," p_llrProcBuf = (__m128i*) &llrProcBuf [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // fprintf(fd," p_llrProcBuf256 = (__m256i*) &llrProcBuf [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); // Loop over BNs fprintf(fd," for (int i=0,j=0;i<M;i++,j+=2) {\n"); + + fprintf(fd," p_bnProcBufRes[i] = p_llrProcBuf256[i];\n"); + // First 16 LLRs of first CN fprintf(fd," ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n"); - fprintf(fd," ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j + 1]);\n"); - - // Loop over CNs - for (k=1; k<1; k++) - { - fprintf(fd," ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup); - fprintf(fd," ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n"); - - fprintf(fd," ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup); + fprintf(fd," ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n"); + fprintf(fd," ymmRes0 = _mm256_adds_epi16(ymm0, ymm1);\n"); - fprintf(fd, " ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n"); - } - - // Add LLR from receiver input - fprintf(fd," ymm0 = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n"); - fprintf(fd," ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n"); - - fprintf(fd," ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n"); - fprintf(fd," ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n"); + + // Second 16 LLRs of first CN + fprintf(fd," ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j + 1 ]);\n"); + fprintf(fd," ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j + 1 ]);\n"); + fprintf(fd," ymmRes1 = _mm256_adds_epi16(ymm0, ymm1);\n"); // Pack results back to epi8 fprintf(fd," ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n"); // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); - } - // ===================================================================== + //} +*/ + // ===================================================================== // Process group with 2 CNs -*/ + fprintf(fd, "// Process group with 2 CNs \n"); @@ -148,8 +146,8 @@ fprintf(fd, "// Process group with 2 CNs \n"); // Loop over BNs fprintf(fd," for (int i=0,j=0;i<M;i++,j+=2) {\n"); // First 16 LLRs of first CN - fprintf(fd," ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n"); - fprintf(fd," ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j + 1]);\n"); + fprintf(fd," ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n"); + fprintf(fd," ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j + 1]);\n"); // Loop over CNs for (k=1; k<2; k++) @@ -174,7 +172,7 @@ fprintf(fd, "// Process group with 2 CNs \n"); // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } @@ -230,7 +228,7 @@ fprintf(fd, "// Process group with 3 CNs \n"); fprintf(fd," ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n"); // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } @@ -288,7 +286,7 @@ fprintf(fd, "// Process group with 4 CNs \n"); // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } @@ -344,7 +342,7 @@ fprintf(fd, "// Process group with 5 CNs \n"); // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } @@ -401,7 +399,7 @@ fprintf(fd, "// Process group with 6 CNs \n"); // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } @@ -458,7 +456,7 @@ fprintf(fd, "// Process group with 7 CNs \n"); // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] //fprintf(fd," (__m256i*) &llrRes[%d + i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n",lut_startAddrBnGroupsLlr[idxBnGroup]>>5 ); fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } @@ -516,7 +514,7 @@ fprintf(fd, "// Process group with 8 CNs \n"); //fprintf(fd," (__m256i*) &llrRes[%d + i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n",lut_startAddrBnGroupsLlr[idxBnGroup]>>5 ); fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } @@ -572,7 +570,7 @@ fprintf(fd, "// Process group with 9 CNs \n"); // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] //fprintf(fd," (__m256i*) &llrRes[%d + i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n",lut_startAddrBnGroupsLlr[idxBnGroup]>>5 ); fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } @@ -628,7 +626,7 @@ fprintf(fd, "// Process group with 10 CNs \n"); // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } @@ -686,7 +684,7 @@ fprintf(fd, "// Process group with 11 CNs \n"); // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } // ===================================================================== @@ -741,7 +739,7 @@ fprintf(fd, "// Process group with 12 CNs \n"); // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } @@ -797,7 +795,7 @@ fprintf(fd, "// Process group with 13 CNs \n"); // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } @@ -854,7 +852,7 @@ fprintf(fd, "// Process group with 14 CNs \n"); // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } @@ -910,7 +908,7 @@ fprintf(fd, "// Process group with 15 CNs \n"); // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } @@ -967,7 +965,7 @@ fprintf(fd, "// Process group with 16 CNs \n"); // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } @@ -1023,7 +1021,7 @@ fprintf(fd, "// Process group with 17 CNs \n"); // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } @@ -1079,7 +1077,7 @@ fprintf(fd, "// Process group with 18 CNs \n"); // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } @@ -1134,7 +1132,7 @@ fprintf(fd, "// Process group with 19 CNs \n"); // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } @@ -1190,7 +1188,7 @@ fprintf(fd, "// Process group with 20 CNs \n"); // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } @@ -1250,7 +1248,7 @@ fprintf(fd, "// Process group with 21 CNs \n"); // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } // ===================================================================== @@ -1305,7 +1303,7 @@ fprintf(fd, "// Process group with 22 CNs \n"); // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } @@ -1347,7 +1345,7 @@ fprintf(fd, "// Process group with <23 CNs \n"); fprintf(fd," ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup); fprintf(fd," ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n"); - } + } // Add LLR from receiver input fprintf(fd," ymm0 = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n"); @@ -1361,7 +1359,7 @@ fprintf(fd, "// Process group with <23 CNs \n"); // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } @@ -1418,7 +1416,7 @@ fprintf(fd, "// Process group with 24 CNs \n"); // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } @@ -1474,7 +1472,7 @@ fprintf(fd, "// Process group with 25 CNs \n"); // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } @@ -1531,7 +1529,7 @@ fprintf(fd, "// Process group with 26 CNs \n"); // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } @@ -1587,7 +1585,7 @@ fprintf(fd, "// Process group with 27 CNs \n"); // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } @@ -1643,7 +1641,7 @@ fprintf(fd, "// Process group with 28 CNs \n"); // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } @@ -1698,7 +1696,7 @@ fprintf(fd, "// Process group with 29 CNs \n"); // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } @@ -1754,7 +1752,7 @@ fprintf(fd, "// Process group with 30 CNs \n"); // ymm0 = [ymmRes1[255:128] ymmRes0[255:128] ymmRes1[127:0] ymmRes0[127:0]] // p_llrRes = [ymmRes1[255:128] ymmRes1[127:0] ymmRes0[255:128] ymmRes0[127:0]] fprintf(fd," p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);\n"); - + fprintf(fd,"}\n"); } @@ -1767,3 +1765,4 @@ fprintf(fd, "// Process group with 30 CNs \n"); + diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG1_avx2.o b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG1_avx2.o deleted file mode 100644 index 8caf4b85a31f5704239e0d6aa2071be5529f93a2..0000000000000000000000000000000000000000 Binary files a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG1_avx2.o and /dev/null differ diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG2_avx2.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG2_avx2.c index a6662ee3314c4d3ecc9bdf4de71bb65a420c1207..87260e18fa2682aa88f6a692937902de9740d4f5 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG2_avx2.c +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG2_avx2.c @@ -1,7 +1,3 @@ - - - - #include <stdint.h> #include <immintrin.h> #include "../../nrLDPCdecoder_defs.h" @@ -25,7 +21,7 @@ void nrLDPC_bnProcPc_BG2_generator_AVX2(int R) // fprintf(fd,"#include <stdint.h>\n"); //fprintf(fd,"#include <immintrin.h>\n"); - fprintf(fd,"static inline void nrLDPC_bnProcPc_BG2_R%s_AVX2(int8_t* bnProcBuf,int8_t* llrRes , int8_t* llrProcBuf, uint16_t Z ) {\n",ratestr[R]); + fprintf(fd,"static inline void nrLDPC_bnProcPc_BG2_R%s_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes,int8_t* llrRes , int8_t* llrProcBuf, uint16_t Z ) {\n",ratestr[R]); const uint8_t* lut_numBnInBnGroups; const uint32_t* lut_startAddrBnGroups; const uint16_t* lut_startAddrBnGroupsLlr; @@ -51,10 +47,9 @@ void nrLDPC_bnProcPc_BG2_generator_AVX2(int R) } else { printf("aborting, illegal R %d\n",R); fclose(fd);abort();} - // Number of BNs in Groups -// uint32_t M; - //uint32_t M32rem; - //uint32_t i,j; + + + uint32_t k; // Offset to each bit within a group in terms of 32 Byte uint32_t cnOffsetInGroup; @@ -66,16 +61,18 @@ void nrLDPC_bnProcPc_BG2_generator_AVX2(int R) fprintf(fd," __m128i* p_bnProcBuf; \n"); fprintf(fd," __m128i* p_llrProcBuf;\n"); fprintf(fd," __m256i* p_llrRes; \n"); + // fprintf(fd," __m256i* p_bnProcBufRes; \n"); + // fprintf(fd," __m256i* p_llrProcBuf256; \n"); fprintf(fd," uint32_t M ;\n"); -fprintf(fd, "// Process group with 1 CNs \n"); - + fprintf(fd, "// Process group with 1 CNs \n"); +/* - // Process group with 2 CNs + // Process group with 1 CNs - if (lut_numBnInBnGroups[0] > 0) - { + // if (lut_numBnInBnGroups[0] > 0) + // { // If elements in group move to next address // idxBnGroup++; @@ -83,36 +80,30 @@ fprintf(fd, "// Process group with 1 CNs \n"); fprintf(fd," M = (%d*Z + 31)>>5;\n",lut_numBnInBnGroups[0] ); // Set the offset to each CN within a group in terms of 16 Byte - cnOffsetInGroup = (lut_numBnInBnGroups[0]*NR_LDPC_ZMAX)>>4; + // cnOffsetInGroup = (lut_numBnInBnGroups[0]*NR_LDPC_ZMAX)>>4; // Set pointers to start of group 2 fprintf(fd," p_bnProcBuf = (__m128i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); fprintf(fd," p_llrProcBuf = (__m128i*) &llrProcBuf [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + fprintf(fd," p_llrProcBuf256 = (__m256i*) &llrProcBuf [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); // Loop over BNs fprintf(fd," for (int i=0,j=0;i<M;i++,j+=2) {\n"); + + fprintf(fd," p_bnProcBufRes[i] = p_llrProcBuf256[i];\n"); + // First 16 LLRs of first CN fprintf(fd," ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n"); fprintf(fd," ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n"); + fprintf(fd," ymmRes0 = _mm256_adds_epi16(ymm0, ymm1);\n"); - // Loop over CNs - /*for (k=1; k<1; k++) - { - fprintf(fd," ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup); - fprintf(fd," ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n"); - - fprintf(fd," ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup); - - fprintf(fd, " ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n"); - } -*/ - // Add LLR from receiver input - fprintf(fd," ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]);\n"); - fprintf(fd," ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);\n"); - fprintf(fd," ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n"); - fprintf(fd," ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);\n"); + // Second 16 LLRs of first CN + fprintf(fd," ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j + 1 ]);\n"); + fprintf(fd," ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j + 1 ]);\n"); + fprintf(fd," ymmRes1 = _mm256_adds_epi16(ymm0, ymm1);\n"); // Pack results back to epi8 fprintf(fd," ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);\n"); @@ -122,8 +113,8 @@ fprintf(fd, "// Process group with 1 CNs \n"); fprintf(fd,"}\n"); - } - // ===================================================================== + //} + */ // ===================================================================== // Process group with 2 CNs @@ -150,8 +141,8 @@ fprintf(fd, "// Process group with 2 CNs \n"); // Loop over BNs fprintf(fd," for (int i=0,j=0;i<M;i++,j+=2) {\n"); // First 16 LLRs of first CN - fprintf(fd," ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n"); - fprintf(fd," ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j + 1]);\n"); + fprintf(fd," ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);\n"); + fprintf(fd," ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j + 1]);\n"); // Loop over CNs for (k=1; k<2; k++) @@ -1349,7 +1340,7 @@ fprintf(fd, "// Process group with <23 CNs \n"); fprintf(fd," ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup); fprintf(fd," ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); \n"); - } + } // Add LLR from receiver input fprintf(fd," ymm0 = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);\n"); @@ -1771,4 +1762,3 @@ fprintf(fd, "// Process group with 30 CNs \n"); - diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG2_avx2.o b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG2_avx2.o deleted file mode 100644 index 2f6a08ea358ebdad5d329ffe676e4f502c3abf58..0000000000000000000000000000000000000000 Binary files a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG2_avx2.o and /dev/null differ diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG1_avx2.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG1_avx2.c index 96dc885766c85e9d267baca176d2c82f256bf70a..00ab6abb6f3d6e0e17fab635a80f5fccdfb11884 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG1_avx2.c +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG1_avx2.c @@ -1,12 +1,8 @@ - #include <stdint.h> #include <immintrin.h> #include "../../nrLDPCdecoder_defs.h" #include "../../nrLDPC_types.h" -//#include "../../nrLDPC_bnProc.h" -//#include "../../nrLDPC_cnProc.h" -//#include "../../nrLDPC_init.h" void nrLDPC_bnProc_BG1_generator_AVX2(int R) @@ -62,14 +58,7 @@ void nrLDPC_bnProc_BG1_generator_AVX2(int R) // Offset to each bit within a group in terms of 32 Byte uint32_t cnOffsetInGroup; uint8_t idxBnGroup = 0; - - - - fprintf(fd," __m256i* p_bnProcBuf; \n"); - fprintf(fd," __m256i* p_bnProcBufRes; \n"); - fprintf(fd," __m256i* p_llrRes; \n"); - fprintf(fd," __m256i* p_res; \n"); - fprintf(fd," uint32_t M, i; \n"); + fprintf(fd," uint32_t M, i; \n"); @@ -79,9 +68,6 @@ void nrLDPC_bnProc_BG1_generator_AVX2(int R) // ===================================================================== - - // ===================================================================== - fprintf(fd, "// Process group with 2 CNs \n"); if (lut_numBnInBnGroups[1] > 0) @@ -97,18 +83,14 @@ fprintf(fd, "// Process group with 2 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[1]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<2; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); - + // Loop over BNs - fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -137,17 +119,16 @@ fprintf(fd, "// Process group with 3 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[2]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + //fprintf(fd," ((__m256i*) bnProcBuf) = ((__m256i*) &bnProcBuf) [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + for (k=0; k<3; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); } @@ -174,17 +155,15 @@ fprintf(fd, "// Process group with 4 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[3]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + for (k=0; k<4; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); - + // Loop over BNs - fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",((lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup),(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), ((lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup)); fprintf(fd,"}\n"); } @@ -210,18 +189,15 @@ fprintf(fd, "// Process group with 4 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[4]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<5; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -249,18 +225,15 @@ fprintf(fd, "// Process group with 6 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[5]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<6; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -287,18 +260,15 @@ fprintf(fd, "// Process group with 7 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[6]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<7; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -325,18 +295,15 @@ fprintf(fd, "// Process group with 8 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[7]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<8; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -362,18 +329,15 @@ fprintf(fd, "// Process group with 9 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[8]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<9; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -399,18 +363,15 @@ fprintf(fd, "// Process group with 10 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[9]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<10; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -435,18 +396,15 @@ fprintf(fd, "// Process group with 11 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[10]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<11; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -471,18 +429,15 @@ fprintf(fd, "// Process group with 12 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[11]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<12; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -509,18 +464,15 @@ fprintf(fd, "// Process group with 13 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[12]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<13; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -548,18 +500,15 @@ fprintf(fd, "// Process group with 14 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[13]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<14; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -586,18 +535,15 @@ fprintf(fd, "// Process group with 15 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[14]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<15; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -625,18 +571,15 @@ fprintf(fd, "// Process group with 16 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[15]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<16; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -663,18 +606,15 @@ fprintf(fd, "// Process group with 17 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[16]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<17; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -701,18 +641,15 @@ fprintf(fd, "// Process group with 18 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[17]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<18; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -738,18 +675,15 @@ fprintf(fd, "// Process group with 19 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[18]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<19; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -776,18 +710,15 @@ fprintf(fd, "// Process group with 20 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[19]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<20; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -818,18 +749,15 @@ fprintf(fd, "// Process group with 21 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[20]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<21; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -854,18 +782,15 @@ fprintf(fd, "// Process group with 22 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[21]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<22; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -892,18 +817,15 @@ fprintf(fd, "// Process group with <23 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[22]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<23; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -931,18 +853,15 @@ fprintf(fd, "// Process group with 24 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[23]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<24; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -969,18 +888,15 @@ fprintf(fd, "// Process group with 25 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[24]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<25; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -1009,18 +925,15 @@ fprintf(fd, "// Process group with 26 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[25]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<26; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -1047,18 +960,15 @@ fprintf(fd, "// Process group with 27 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[26]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<27; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -1085,18 +995,15 @@ fprintf(fd, "// Process group with 28 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[27]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<28; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -1121,18 +1028,15 @@ fprintf(fd, "// Process group with 29 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[28]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<29; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -1158,18 +1062,15 @@ fprintf(fd, "// Process group with 30 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[29]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<30; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG1_avx2.o b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG1_avx2.o deleted file mode 100644 index d40fd0a3af91fdbdb82afdac3ac2f5500eec1dc6..0000000000000000000000000000000000000000 Binary files a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG1_avx2.o and /dev/null differ diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG2_avx2.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG2_avx2.c index 617dc54520d580713b13fefa741f4abfd3988dcd..aef582ad6f07aaf04e187c8d742e1c164c221d58 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG2_avx2.c +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG2_avx2.c @@ -4,9 +4,7 @@ #include <immintrin.h> #include "../../nrLDPCdecoder_defs.h" #include "../../nrLDPC_types.h" -//#include "../../nrLDPC_bnProc.h" -//#include "../../nrLDPC_cnProc.h" -//#include "../../nrLDPC_init.h" + void nrLDPC_bnProc_BG2_generator_AVX2(int R) @@ -23,8 +21,7 @@ void nrLDPC_bnProc_BG2_generator_AVX2(int R) FILE *fd=fopen(fname,"w"); if (fd == NULL) {printf("Cannot create \n");abort();} - fprintf(fd,"#include <stdint.h>\n"); - fprintf(fd,"#include <immintrin.h>\n"); + fprintf(fd,"void nrLDPC_bnProc_BG2_R%s_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) {\n",ratestr[R]); const uint8_t* lut_numBnInBnGroups; @@ -77,9 +74,6 @@ void nrLDPC_bnProc_BG2_generator_AVX2(int R) // ===================================================================== - - // ===================================================================== - fprintf(fd, "// Process group with 2 CNs \n"); if (lut_numBnInBnGroups[1] > 0) @@ -95,18 +89,14 @@ fprintf(fd, "// Process group with 2 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[1]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<2; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); - + // Loop over BNs - fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -135,17 +125,16 @@ fprintf(fd, "// Process group with 3 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[2]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + //fprintf(fd," ((__m256i*) bnProcBuf) = ((__m256i*) &bnProcBuf) [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + for (k=0; k<3; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); } @@ -172,17 +161,15 @@ fprintf(fd, "// Process group with 4 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[3]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + for (k=0; k<4; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); - + // Loop over BNs - fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",((lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup),(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), ((lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup)); fprintf(fd,"}\n"); } @@ -208,18 +195,15 @@ fprintf(fd, "// Process group with 4 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[4]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<5; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -247,18 +231,15 @@ fprintf(fd, "// Process group with 6 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[5]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<6; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -285,18 +266,15 @@ fprintf(fd, "// Process group with 7 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[6]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<7; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -323,18 +301,15 @@ fprintf(fd, "// Process group with 8 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[7]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<8; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -360,18 +335,15 @@ fprintf(fd, "// Process group with 9 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[8]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<9; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -397,18 +369,15 @@ fprintf(fd, "// Process group with 10 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[9]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<10; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -433,18 +402,15 @@ fprintf(fd, "// Process group with 11 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[10]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<11; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -469,18 +435,15 @@ fprintf(fd, "// Process group with 12 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[11]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<12; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -507,18 +470,15 @@ fprintf(fd, "// Process group with 13 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[12]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<13; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -546,18 +506,15 @@ fprintf(fd, "// Process group with 14 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[13]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<14; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -584,18 +541,15 @@ fprintf(fd, "// Process group with 15 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[14]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<15; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -623,18 +577,15 @@ fprintf(fd, "// Process group with 16 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[15]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<16; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -661,18 +612,15 @@ fprintf(fd, "// Process group with 17 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[16]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<17; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -699,18 +647,15 @@ fprintf(fd, "// Process group with 18 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[17]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<18; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -736,25 +681,21 @@ fprintf(fd, "// Process group with 19 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[18]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<19; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); } } - // ===================================================================== @@ -774,18 +715,15 @@ fprintf(fd, "// Process group with 20 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[19]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<20; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -816,18 +754,15 @@ fprintf(fd, "// Process group with 21 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[20]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<21; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -852,18 +787,15 @@ fprintf(fd, "// Process group with 22 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[21]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<22; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -890,18 +822,15 @@ fprintf(fd, "// Process group with <23 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[22]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<23; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -929,18 +858,15 @@ fprintf(fd, "// Process group with 24 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[23]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<24; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -967,18 +893,15 @@ fprintf(fd, "// Process group with 25 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[24]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<25; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -1007,18 +930,15 @@ fprintf(fd, "// Process group with 26 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[25]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<26; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -1045,18 +965,15 @@ fprintf(fd, "// Process group with 27 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[26]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<27; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -1083,18 +1000,15 @@ fprintf(fd, "// Process group with 28 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[27]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<28; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -1119,18 +1033,15 @@ fprintf(fd, "// Process group with 29 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[28]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<29; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -1156,18 +1067,15 @@ fprintf(fd, "// Process group with 30 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[29]*NR_LDPC_ZMAX)>>5; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m256i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m256i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<30; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m256i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m256i*)bnProcBufRes)[%d + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[%d + i ], ((__m256i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>5), (lut_startAddrBnGroups[idxBnGroup]>>5)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -1176,11 +1084,6 @@ fprintf(fd, "// Process group with 30 CNs \n"); fprintf(fd,"}\n"); fclose(fd); -}//end of the function nrLDPC_bnProc_BG1 - - - - - +}//end of the function nrLDPC_bnProc_BG2 diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG2_avx2.o b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG2_avx2.o deleted file mode 100644 index 248c7244cf105f023b1896cd4308aee4e0e8ce83..0000000000000000000000000000000000000000 Binary files a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG2_avx2.o and /dev/null differ diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_avx2 b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_avx2 index 8e7d6324bd6cb8fa9fbb81d57e9d9c56413530da..7133f3f4c38b305fb9af3c5475604573f4ae8c37 100755 Binary files a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_avx2 and b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_avx2 differ diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/main.o b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/main.o deleted file mode 100644 index c9ea8704ae2d57a0c36127605f696ab9b1febb99..0000000000000000000000000000000000000000 Binary files a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/main.o and /dev/null differ diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/sauvegarde.tar.gz b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/sauvegarde.tar.gz deleted file mode 100644 index 438d0b4b2111a33261ab02399920965505229458..0000000000000000000000000000000000000000 Binary files a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/sauvegarde.tar.gz and /dev/null differ diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProcPc_gen_BG1_avx512.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProcPc_gen_BG1_avx512.c index 09bd99521639960ad989eb46a832cf33ee5c96af..fabeda7551f5ee547c6cd257370a17146712c7d0 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProcPc_gen_BG1_avx512.c +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProcPc_gen_BG1_avx512.c @@ -1,7 +1,3 @@ - - - - #include <stdint.h> #include <immintrin.h> #include "../../nrLDPCdecoder_defs.h" @@ -148,8 +144,8 @@ fprintf(fd, "// Process group with 2 CNs \n"); // Loop over BNs fprintf(fd," for (int i=0,j=0;i<M;i++,j+=2) {\n"); // First 16 LLRs of first CN - fprintf(fd," zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n"); - fprintf(fd," zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[j + 1]);\n"); + fprintf(fd," zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n"); + fprintf(fd," zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf[j + 1]);\n"); // Loop over CNs for (k=1; k<2; k++) diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProcPc_gen_BG2_avx512.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProcPc_gen_BG2_avx512.c index a025ae8efb3b1b653720ae741c66e2d8f1ce38a6..e5858e6bfdc1edcdb56f1010ece26165c7c89d2d 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProcPc_gen_BG2_avx512.c +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProcPc_gen_BG2_avx512.c @@ -146,8 +146,8 @@ fprintf(fd, "// Process group with 2 CNs \n"); // Loop over BNs fprintf(fd," for (int i=0,j=0;i<M;i++,j+=2) {\n"); // First 16 LLRs of first CN - fprintf(fd," zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n"); - fprintf(fd," zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[j + 1]);\n"); + fprintf(fd," zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n"); + fprintf(fd," zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf[j + 1]);\n"); // Loop over CNs for (k=1; k<2; k++) diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_BG1_avx512.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_BG1_avx512.c index 07c9a4a4c347c607a456cf46e75f596dd1c1d674..c19bceca228ac16bcc1b851e6660333a2d7d536e 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_BG1_avx512.c +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_BG1_avx512.c @@ -1,13 +1,10 @@ - - #include <stdint.h> #include <immintrin.h> #include "../../nrLDPCdecoder_defs.h" #include "../../nrLDPC_types.h" - void nrLDPC_bnProc_BG1_generator_AVX512(int R) { const char *ratestr[3]={"13","23","89"}; @@ -61,14 +58,7 @@ void nrLDPC_bnProc_BG1_generator_AVX512(int R) // Offset to each bit within a group in terms of 32 Byte uint32_t cnOffsetInGroup; uint8_t idxBnGroup = 0; - - - - fprintf(fd," __m512i* p_bnProcBuf; \n"); - fprintf(fd," __m512i* p_bnProcBufRes; \n"); - fprintf(fd," __m512i* p_llrRes; \n"); - fprintf(fd," __m512i* p_res; \n"); - fprintf(fd," uint32_t M, i; \n"); + fprintf(fd," uint32_t M, i; \n"); @@ -78,9 +68,6 @@ void nrLDPC_bnProc_BG1_generator_AVX512(int R) // ===================================================================== - - // ===================================================================== - fprintf(fd, "// Process group with 2 CNs \n"); if (lut_numBnInBnGroups[1] > 0) @@ -96,18 +83,14 @@ fprintf(fd, "// Process group with 2 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[1]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<2; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); - + // Loop over BNs - fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -136,17 +119,16 @@ fprintf(fd, "// Process group with 3 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[2]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + //fprintf(fd," ((__m512i*) bnProcBuf) = ((__m512i*) &bnProcBuf) [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + for (k=0; k<3; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); } @@ -173,17 +155,15 @@ fprintf(fd, "// Process group with 4 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[3]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + for (k=0; k<4; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); - + // Loop over BNs - fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",((lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup),(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), ((lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup)); fprintf(fd,"}\n"); } @@ -209,18 +189,15 @@ fprintf(fd, "// Process group with 4 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[4]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<5; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -248,18 +225,15 @@ fprintf(fd, "// Process group with 6 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[5]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<6; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -286,18 +260,15 @@ fprintf(fd, "// Process group with 7 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[6]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<7; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -324,18 +295,15 @@ fprintf(fd, "// Process group with 8 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[7]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<8; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -361,18 +329,15 @@ fprintf(fd, "// Process group with 9 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[8]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<9; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -398,18 +363,15 @@ fprintf(fd, "// Process group with 10 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[9]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<10; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -434,18 +396,15 @@ fprintf(fd, "// Process group with 11 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[10]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<11; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -470,18 +429,15 @@ fprintf(fd, "// Process group with 12 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[11]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<12; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -508,18 +464,15 @@ fprintf(fd, "// Process group with 13 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[12]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<13; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -547,18 +500,15 @@ fprintf(fd, "// Process group with 14 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[13]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<14; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -585,18 +535,15 @@ fprintf(fd, "// Process group with 15 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[14]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<15; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -624,18 +571,15 @@ fprintf(fd, "// Process group with 16 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[15]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<16; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -662,18 +606,15 @@ fprintf(fd, "// Process group with 17 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[16]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<17; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -700,18 +641,15 @@ fprintf(fd, "// Process group with 18 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[17]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<18; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -737,18 +675,15 @@ fprintf(fd, "// Process group with 19 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[18]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<19; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -775,18 +710,15 @@ fprintf(fd, "// Process group with 20 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[19]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<20; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -817,18 +749,15 @@ fprintf(fd, "// Process group with 21 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[20]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<21; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -853,18 +782,15 @@ fprintf(fd, "// Process group with 22 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[21]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<22; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -891,18 +817,15 @@ fprintf(fd, "// Process group with <23 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[22]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<23; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -930,18 +853,15 @@ fprintf(fd, "// Process group with 24 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[23]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<24; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -968,18 +888,15 @@ fprintf(fd, "// Process group with 25 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[24]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<25; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -1008,18 +925,15 @@ fprintf(fd, "// Process group with 26 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[25]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<26; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -1046,18 +960,15 @@ fprintf(fd, "// Process group with 27 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[26]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<27; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -1084,18 +995,15 @@ fprintf(fd, "// Process group with 28 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[27]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<28; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -1120,18 +1028,15 @@ fprintf(fd, "// Process group with 29 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[28]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<29; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -1157,18 +1062,15 @@ fprintf(fd, "// Process group with 30 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[29]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<30; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_BG2_avx512.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_BG2_avx512.c index ccd01d29a340c83d17a51b00ea40a16f72a9f57c..82f410bcdafcfa788008c369afc2d81d48773c58 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_BG2_avx512.c +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_BG2_avx512.c @@ -1,5 +1,3 @@ - - #include <stdint.h> #include <immintrin.h> #include "../../nrLDPCdecoder_defs.h" @@ -50,21 +48,12 @@ void nrLDPC_bnProc_BG2_generator_AVX512(int R) else { printf("aborting, illegal R %d\n",R); fclose(fd);abort();} - //uint32_t M; - //uint32_t M32rem; - // uint32_t i; + uint32_t k; // Offset to each bit within a group in terms of 32 Byte uint32_t cnOffsetInGroup; uint8_t idxBnGroup = 0; - - - - fprintf(fd," __m512i* p_bnProcBuf; \n"); - fprintf(fd," __m512i* p_bnProcBufRes; \n"); - fprintf(fd," __m512i* p_llrRes; \n"); - fprintf(fd," __m512i* p_res; \n"); - fprintf(fd," uint32_t M, i; \n"); + fprintf(fd," uint32_t M, i; \n"); @@ -74,9 +63,6 @@ void nrLDPC_bnProc_BG2_generator_AVX512(int R) // ===================================================================== - - // ===================================================================== - fprintf(fd, "// Process group with 2 CNs \n"); if (lut_numBnInBnGroups[1] > 0) @@ -92,18 +78,14 @@ fprintf(fd, "// Process group with 2 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[1]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<2; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); - + // Loop over BNs - fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -132,17 +114,16 @@ fprintf(fd, "// Process group with 3 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[2]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + //fprintf(fd," ((__m512i*) bnProcBuf) = ((__m512i*) &bnProcBuf) [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + for (k=0; k<3; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); } @@ -169,17 +150,15 @@ fprintf(fd, "// Process group with 4 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[3]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + for (k=0; k<4; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); - + // Loop over BNs - fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",((lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup),(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), ((lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup)); fprintf(fd,"}\n"); } @@ -205,18 +184,15 @@ fprintf(fd, "// Process group with 4 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[4]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<5; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -244,18 +220,15 @@ fprintf(fd, "// Process group with 6 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[5]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<6; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -282,18 +255,15 @@ fprintf(fd, "// Process group with 7 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[6]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<7; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -320,18 +290,15 @@ fprintf(fd, "// Process group with 8 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[7]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<8; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -357,18 +324,15 @@ fprintf(fd, "// Process group with 9 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[8]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<9; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -394,18 +358,15 @@ fprintf(fd, "// Process group with 10 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[9]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<10; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -430,18 +391,15 @@ fprintf(fd, "// Process group with 11 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[10]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<11; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -466,18 +424,15 @@ fprintf(fd, "// Process group with 12 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[11]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<12; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -504,18 +459,15 @@ fprintf(fd, "// Process group with 13 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[12]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<13; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -543,18 +495,15 @@ fprintf(fd, "// Process group with 14 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[13]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<14; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -581,18 +530,15 @@ fprintf(fd, "// Process group with 15 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[14]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<15; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -620,18 +566,15 @@ fprintf(fd, "// Process group with 16 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[15]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<16; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -658,18 +601,15 @@ fprintf(fd, "// Process group with 17 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[16]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<17; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -696,18 +636,15 @@ fprintf(fd, "// Process group with 18 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[17]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<18; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -733,18 +670,15 @@ fprintf(fd, "// Process group with 19 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[18]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<19; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -771,18 +705,15 @@ fprintf(fd, "// Process group with 20 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[19]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<20; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -813,18 +744,15 @@ fprintf(fd, "// Process group with 21 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[20]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<21; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -849,18 +777,15 @@ fprintf(fd, "// Process group with 22 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[21]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<22; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -887,18 +812,15 @@ fprintf(fd, "// Process group with <23 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[22]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<23; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -926,18 +848,15 @@ fprintf(fd, "// Process group with 24 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[23]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<24; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -964,18 +883,15 @@ fprintf(fd, "// Process group with 25 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[24]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<25; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -1004,18 +920,15 @@ fprintf(fd, "// Process group with 26 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[25]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<26; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -1042,18 +955,15 @@ fprintf(fd, "// Process group with 27 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[26]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<27; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -1080,18 +990,15 @@ fprintf(fd, "// Process group with 28 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[27]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<28; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -1116,18 +1023,15 @@ fprintf(fd, "// Process group with 29 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[28]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<29; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -1153,18 +1057,15 @@ fprintf(fd, "// Process group with 30 CNs \n"); cnOffsetInGroup = (lut_numBnInBnGroups[29]*NR_LDPC_ZMAX)>>6; // Set pointers to start of group 2 - fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); - + // Loop over CNs for (k=0; k<30; k++) { - fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); - fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + // Loop over BNs fprintf(fd," for (i=0;i<M;i++) {\n"); - fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + fprintf(fd," ((__m512i*)bnProcBufRes)[%d + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[%d + i ], ((__m512i*) bnProcBuf)[%d + i]);\n",(lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup,(lut_startAddrBnGroupsLlr[idxBnGroup]>>6), (lut_startAddrBnGroups[idxBnGroup]>>6)+ k*cnOffsetInGroup); fprintf(fd,"}\n"); @@ -1179,6 +1080,3 @@ fprintf(fd, "// Process group with 30 CNs \n"); - - - diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_avx512 b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_avx512 index dabfabe18fdbb098b9ea9a0996079e0fde890445..86fe416a4a907302fb96ed46168e06f6ad0c6017 100755 Binary files a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_avx512 and b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_avx512 differ diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG1_R13_AVX2.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG1_R13_AVX2.h index ada3706a6fe9f3a652e781d0e09998d37fb9de31..bd46948100cb7e055f4b184d6650dfef2fbd8b48 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG1_R13_AVX2.h +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG1_R13_AVX2.h @@ -1,475 +1,281 @@ static inline void nrLDPC_bnProc_BG1_R13_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) { - __m256i* p_bnProcBuf; - __m256i* p_bnProcBufRes; - __m256i* p_llrRes; - __m256i* p_res; uint32_t M, i; // Process group with 2 CNs // Process group with 3 CNs // Process group with 4 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [16128]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [16128]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [16128]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[504 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[504 + i ], ((__m256i*) bnProcBuf)[504 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [16128]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[516 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[504 + i ], ((__m256i*) bnProcBuf)[516 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [16128]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[528 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[504 + i ], ((__m256i*) bnProcBuf)[528 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [16128]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[540 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[504 + i ], ((__m256i*) bnProcBuf)[540 + i]); } // Process group with 5 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [17664]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [17664]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[552 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[516 + i ], ((__m256i*) bnProcBuf)[552 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[564 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[516 + i ], ((__m256i*) bnProcBuf)[564 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[576 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[516 + i ], ((__m256i*) bnProcBuf)[576 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[588 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[516 + i ], ((__m256i*) bnProcBuf)[588 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[600 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[516 + i ], ((__m256i*) bnProcBuf)[600 + i]); } // Process group with 6 CNs M = (2*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [19584]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [19584]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [16896]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[612 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[528 + i ], ((__m256i*) bnProcBuf)[612 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [16896]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[636 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[528 + i ], ((__m256i*) bnProcBuf)[636 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [16896]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[660 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[528 + i ], ((__m256i*) bnProcBuf)[660 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [16896]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[684 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[528 + i ], ((__m256i*) bnProcBuf)[684 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [16896]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m256i*)bnProcBufRes)[708 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[528 + i ], ((__m256i*) bnProcBuf)[708 + i]); } - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [16896]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); + ((__m256i*)bnProcBufRes)[732 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[528 + i ], ((__m256i*) bnProcBuf)[732 + i]); } // Process group with 7 CNs M = (4*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [24192]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [24192]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[756 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[552 + i ], ((__m256i*) bnProcBuf)[756 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[804 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[552 + i ], ((__m256i*) bnProcBuf)[804 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m256i*)bnProcBufRes)[852 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[552 + i ], ((__m256i*) bnProcBuf)[852 + i]); } - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m256i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); + ((__m256i*)bnProcBufRes)[900 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[552 + i ], ((__m256i*) bnProcBuf)[900 + i]); } - p_res = &p_bnProcBufRes[192]; - p_llrRes = (__m256i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[192 + i]); + ((__m256i*)bnProcBufRes)[948 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[552 + i ], ((__m256i*) bnProcBuf)[948 + i]); } - p_res = &p_bnProcBufRes[240]; - p_llrRes = (__m256i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[240 + i]); + ((__m256i*)bnProcBufRes)[996 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[552 + i ], ((__m256i*) bnProcBuf)[996 + i]); } - p_res = &p_bnProcBufRes[288]; - p_llrRes = (__m256i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[288 + i]); + ((__m256i*)bnProcBufRes)[1044 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[552 + i ], ((__m256i*) bnProcBuf)[1044 + i]); } // Process group with 8 CNs M = (3*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [34944]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [34944]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[1092 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[1092 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[1128 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[1128 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[1164 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[1164 + i]); } - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); + ((__m256i*)bnProcBufRes)[1200 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[1200 + i]); } - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); + ((__m256i*)bnProcBufRes)[1236 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[1236 + i]); } - p_res = &p_bnProcBufRes[180]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[180 + i]); + ((__m256i*)bnProcBufRes)[1272 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[1272 + i]); } - p_res = &p_bnProcBufRes[216]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[216 + i]); + ((__m256i*)bnProcBufRes)[1308 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[1308 + i]); } - p_res = &p_bnProcBufRes[252]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[252 + i]); + ((__m256i*)bnProcBufRes)[1344 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[1344 + i]); } // Process group with 9 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [44160]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [44160]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [20352]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[1380 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[636 + i ], ((__m256i*) bnProcBuf)[1380 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [20352]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[1392 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[636 + i ], ((__m256i*) bnProcBuf)[1392 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [20352]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[1404 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[636 + i ], ((__m256i*) bnProcBuf)[1404 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [20352]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[1416 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[636 + i ], ((__m256i*) bnProcBuf)[1416 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [20352]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[1428 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[636 + i ], ((__m256i*) bnProcBuf)[1428 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [20352]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m256i*)bnProcBufRes)[1440 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[636 + i ], ((__m256i*) bnProcBuf)[1440 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [20352]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[1452 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[636 + i ], ((__m256i*) bnProcBuf)[1452 + i]); } - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [20352]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); + ((__m256i*)bnProcBufRes)[1464 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[636 + i ], ((__m256i*) bnProcBuf)[1464 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [20352]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m256i*)bnProcBufRes)[1476 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[636 + i ], ((__m256i*) bnProcBuf)[1476 + i]); } // Process group with 10 CNs M = (4*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [47616]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [47616]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [20736]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[1488 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[648 + i ], ((__m256i*) bnProcBuf)[1488 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [20736]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[1536 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[648 + i ], ((__m256i*) bnProcBuf)[1536 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [20736]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m256i*)bnProcBufRes)[1584 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[648 + i ], ((__m256i*) bnProcBuf)[1584 + i]); } - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m256i*) &llrRes [20736]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); + ((__m256i*)bnProcBufRes)[1632 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[648 + i ], ((__m256i*) bnProcBuf)[1632 + i]); } - p_res = &p_bnProcBufRes[192]; - p_llrRes = (__m256i*) &llrRes [20736]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[192 + i]); + ((__m256i*)bnProcBufRes)[1680 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[648 + i ], ((__m256i*) bnProcBuf)[1680 + i]); } - p_res = &p_bnProcBufRes[240]; - p_llrRes = (__m256i*) &llrRes [20736]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[240 + i]); + ((__m256i*)bnProcBufRes)[1728 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[648 + i ], ((__m256i*) bnProcBuf)[1728 + i]); } - p_res = &p_bnProcBufRes[288]; - p_llrRes = (__m256i*) &llrRes [20736]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[288 + i]); + ((__m256i*)bnProcBufRes)[1776 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[648 + i ], ((__m256i*) bnProcBuf)[1776 + i]); } - p_res = &p_bnProcBufRes[336]; - p_llrRes = (__m256i*) &llrRes [20736]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[336 + i]); + ((__m256i*)bnProcBufRes)[1824 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[648 + i ], ((__m256i*) bnProcBuf)[1824 + i]); } - p_res = &p_bnProcBufRes[384]; - p_llrRes = (__m256i*) &llrRes [20736]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[384 + i]); + ((__m256i*)bnProcBufRes)[1872 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[648 + i ], ((__m256i*) bnProcBuf)[1872 + i]); } - p_res = &p_bnProcBufRes[432]; - p_llrRes = (__m256i*) &llrRes [20736]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[432 + i]); + ((__m256i*)bnProcBufRes)[1920 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[648 + i ], ((__m256i*) bnProcBuf)[1920 + i]); } // Process group with 11 CNs M = (3*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [62976]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [62976]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [22272]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[1968 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[696 + i ], ((__m256i*) bnProcBuf)[1968 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [22272]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[2004 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[696 + i ], ((__m256i*) bnProcBuf)[2004 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [22272]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[2040 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[696 + i ], ((__m256i*) bnProcBuf)[2040 + i]); } - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [22272]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); + ((__m256i*)bnProcBufRes)[2076 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[696 + i ], ((__m256i*) bnProcBuf)[2076 + i]); } - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m256i*) &llrRes [22272]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); + ((__m256i*)bnProcBufRes)[2112 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[696 + i ], ((__m256i*) bnProcBuf)[2112 + i]); } - p_res = &p_bnProcBufRes[180]; - p_llrRes = (__m256i*) &llrRes [22272]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[180 + i]); + ((__m256i*)bnProcBufRes)[2148 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[696 + i ], ((__m256i*) bnProcBuf)[2148 + i]); } - p_res = &p_bnProcBufRes[216]; - p_llrRes = (__m256i*) &llrRes [22272]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[216 + i]); + ((__m256i*)bnProcBufRes)[2184 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[696 + i ], ((__m256i*) bnProcBuf)[2184 + i]); } - p_res = &p_bnProcBufRes[252]; - p_llrRes = (__m256i*) &llrRes [22272]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[252 + i]); + ((__m256i*)bnProcBufRes)[2220 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[696 + i ], ((__m256i*) bnProcBuf)[2220 + i]); } - p_res = &p_bnProcBufRes[288]; - p_llrRes = (__m256i*) &llrRes [22272]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[288 + i]); + ((__m256i*)bnProcBufRes)[2256 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[696 + i ], ((__m256i*) bnProcBuf)[2256 + i]); } - p_res = &p_bnProcBufRes[324]; - p_llrRes = (__m256i*) &llrRes [22272]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[324 + i]); + ((__m256i*)bnProcBufRes)[2292 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[696 + i ], ((__m256i*) bnProcBuf)[2292 + i]); } - p_res = &p_bnProcBufRes[360]; - p_llrRes = (__m256i*) &llrRes [22272]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[360 + i]); + ((__m256i*)bnProcBufRes)[2328 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[696 + i ], ((__m256i*) bnProcBuf)[2328 + i]); } // Process group with 12 CNs M = (4*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [75648]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [75648]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [23424]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[2364 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[732 + i ], ((__m256i*) bnProcBuf)[2364 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [23424]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[2412 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[732 + i ], ((__m256i*) bnProcBuf)[2412 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [23424]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m256i*)bnProcBufRes)[2460 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[732 + i ], ((__m256i*) bnProcBuf)[2460 + i]); } - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m256i*) &llrRes [23424]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); + ((__m256i*)bnProcBufRes)[2508 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[732 + i ], ((__m256i*) bnProcBuf)[2508 + i]); } - p_res = &p_bnProcBufRes[192]; - p_llrRes = (__m256i*) &llrRes [23424]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[192 + i]); + ((__m256i*)bnProcBufRes)[2556 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[732 + i ], ((__m256i*) bnProcBuf)[2556 + i]); } - p_res = &p_bnProcBufRes[240]; - p_llrRes = (__m256i*) &llrRes [23424]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[240 + i]); + ((__m256i*)bnProcBufRes)[2604 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[732 + i ], ((__m256i*) bnProcBuf)[2604 + i]); } - p_res = &p_bnProcBufRes[288]; - p_llrRes = (__m256i*) &llrRes [23424]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[288 + i]); + ((__m256i*)bnProcBufRes)[2652 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[732 + i ], ((__m256i*) bnProcBuf)[2652 + i]); } - p_res = &p_bnProcBufRes[336]; - p_llrRes = (__m256i*) &llrRes [23424]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[336 + i]); + ((__m256i*)bnProcBufRes)[2700 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[732 + i ], ((__m256i*) bnProcBuf)[2700 + i]); } - p_res = &p_bnProcBufRes[384]; - p_llrRes = (__m256i*) &llrRes [23424]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[384 + i]); + ((__m256i*)bnProcBufRes)[2748 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[732 + i ], ((__m256i*) bnProcBuf)[2748 + i]); } - p_res = &p_bnProcBufRes[432]; - p_llrRes = (__m256i*) &llrRes [23424]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[432 + i]); + ((__m256i*)bnProcBufRes)[2796 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[732 + i ], ((__m256i*) bnProcBuf)[2796 + i]); } - p_res = &p_bnProcBufRes[480]; - p_llrRes = (__m256i*) &llrRes [23424]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[480 + i]); + ((__m256i*)bnProcBufRes)[2844 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[732 + i ], ((__m256i*) bnProcBuf)[2844 + i]); } - p_res = &p_bnProcBufRes[528]; - p_llrRes = (__m256i*) &llrRes [23424]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[528 + i]); + ((__m256i*)bnProcBufRes)[2892 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[732 + i ], ((__m256i*) bnProcBuf)[2892 + i]); } // Process group with 13 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [94080]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [94080]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [24960]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[2940 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[780 + i ], ((__m256i*) bnProcBuf)[2940 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [24960]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[2952 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[780 + i ], ((__m256i*) bnProcBuf)[2952 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [24960]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[2964 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[780 + i ], ((__m256i*) bnProcBuf)[2964 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [24960]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[2976 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[780 + i ], ((__m256i*) bnProcBuf)[2976 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [24960]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[2988 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[780 + i ], ((__m256i*) bnProcBuf)[2988 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [24960]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m256i*)bnProcBufRes)[3000 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[780 + i ], ((__m256i*) bnProcBuf)[3000 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [24960]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[3012 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[780 + i ], ((__m256i*) bnProcBuf)[3012 + i]); } - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [24960]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); + ((__m256i*)bnProcBufRes)[3024 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[780 + i ], ((__m256i*) bnProcBuf)[3024 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [24960]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m256i*)bnProcBufRes)[3036 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[780 + i ], ((__m256i*) bnProcBuf)[3036 + i]); } - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [24960]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); + ((__m256i*)bnProcBufRes)[3048 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[780 + i ], ((__m256i*) bnProcBuf)[3048 + i]); } - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [24960]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); + ((__m256i*)bnProcBufRes)[3060 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[780 + i ], ((__m256i*) bnProcBuf)[3060 + i]); } - p_res = &p_bnProcBufRes[132]; - p_llrRes = (__m256i*) &llrRes [24960]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); + ((__m256i*)bnProcBufRes)[3072 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[780 + i ], ((__m256i*) bnProcBuf)[3072 + i]); } - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m256i*) &llrRes [24960]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); + ((__m256i*)bnProcBufRes)[3084 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[780 + i ], ((__m256i*) bnProcBuf)[3084 + i]); } // Process group with 14 CNs // Process group with 15 CNs @@ -487,301 +293,181 @@ static inline void nrLDPC_bnProc_BG1_R13_AVX2(int8_t* bnProcBuf,int8_t* bnProcBu // Process group with 27 CNs // Process group with 28 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [99072]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [99072]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[3096 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3096 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[3108 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3108 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[3120 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3120 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[3132 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3132 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[3144 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3144 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m256i*)bnProcBufRes)[3156 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3156 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[3168 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3168 + i]); } - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); + ((__m256i*)bnProcBufRes)[3180 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3180 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m256i*)bnProcBufRes)[3192 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3192 + i]); } - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); + ((__m256i*)bnProcBufRes)[3204 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3204 + i]); } - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); + ((__m256i*)bnProcBufRes)[3216 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3216 + i]); } - p_res = &p_bnProcBufRes[132]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); + ((__m256i*)bnProcBufRes)[3228 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3228 + i]); } - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); + ((__m256i*)bnProcBufRes)[3240 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3240 + i]); } - p_res = &p_bnProcBufRes[156]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]); + ((__m256i*)bnProcBufRes)[3252 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3252 + i]); } - p_res = &p_bnProcBufRes[168]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]); + ((__m256i*)bnProcBufRes)[3264 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3264 + i]); } - p_res = &p_bnProcBufRes[180]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[180 + i]); + ((__m256i*)bnProcBufRes)[3276 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3276 + i]); } - p_res = &p_bnProcBufRes[192]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[192 + i]); + ((__m256i*)bnProcBufRes)[3288 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3288 + i]); } - p_res = &p_bnProcBufRes[204]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[204 + i]); + ((__m256i*)bnProcBufRes)[3300 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3300 + i]); } - p_res = &p_bnProcBufRes[216]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[216 + i]); + ((__m256i*)bnProcBufRes)[3312 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3312 + i]); } - p_res = &p_bnProcBufRes[228]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[228 + i]); + ((__m256i*)bnProcBufRes)[3324 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3324 + i]); } - p_res = &p_bnProcBufRes[240]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[240 + i]); + ((__m256i*)bnProcBufRes)[3336 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3336 + i]); } - p_res = &p_bnProcBufRes[252]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[252 + i]); + ((__m256i*)bnProcBufRes)[3348 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3348 + i]); } - p_res = &p_bnProcBufRes[264]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[264 + i]); + ((__m256i*)bnProcBufRes)[3360 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3360 + i]); } - p_res = &p_bnProcBufRes[276]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[276 + i]); + ((__m256i*)bnProcBufRes)[3372 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3372 + i]); } - p_res = &p_bnProcBufRes[288]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[288 + i]); + ((__m256i*)bnProcBufRes)[3384 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3384 + i]); } - p_res = &p_bnProcBufRes[300]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[300 + i]); + ((__m256i*)bnProcBufRes)[3396 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3396 + i]); } - p_res = &p_bnProcBufRes[312]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[312 + i]); + ((__m256i*)bnProcBufRes)[3408 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3408 + i]); } - p_res = &p_bnProcBufRes[324]; - p_llrRes = (__m256i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[324 + i]); + ((__m256i*)bnProcBufRes)[3420 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[792 + i ], ((__m256i*) bnProcBuf)[3420 + i]); } // Process group with 29 CNs // Process group with 30 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [109824]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [109824]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[3432 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3432 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[3444 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3444 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[3456 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3456 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[3468 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3468 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[3480 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3480 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m256i*)bnProcBufRes)[3492 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3492 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[3504 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3504 + i]); } - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); + ((__m256i*)bnProcBufRes)[3516 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3516 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m256i*)bnProcBufRes)[3528 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3528 + i]); } - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); + ((__m256i*)bnProcBufRes)[3540 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3540 + i]); } - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); + ((__m256i*)bnProcBufRes)[3552 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3552 + i]); } - p_res = &p_bnProcBufRes[132]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); + ((__m256i*)bnProcBufRes)[3564 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3564 + i]); } - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); + ((__m256i*)bnProcBufRes)[3576 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3576 + i]); } - p_res = &p_bnProcBufRes[156]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]); + ((__m256i*)bnProcBufRes)[3588 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3588 + i]); } - p_res = &p_bnProcBufRes[168]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]); + ((__m256i*)bnProcBufRes)[3600 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3600 + i]); } - p_res = &p_bnProcBufRes[180]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[180 + i]); + ((__m256i*)bnProcBufRes)[3612 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3612 + i]); } - p_res = &p_bnProcBufRes[192]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[192 + i]); + ((__m256i*)bnProcBufRes)[3624 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3624 + i]); } - p_res = &p_bnProcBufRes[204]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[204 + i]); + ((__m256i*)bnProcBufRes)[3636 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3636 + i]); } - p_res = &p_bnProcBufRes[216]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[216 + i]); + ((__m256i*)bnProcBufRes)[3648 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3648 + i]); } - p_res = &p_bnProcBufRes[228]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[228 + i]); + ((__m256i*)bnProcBufRes)[3660 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3660 + i]); } - p_res = &p_bnProcBufRes[240]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[240 + i]); + ((__m256i*)bnProcBufRes)[3672 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3672 + i]); } - p_res = &p_bnProcBufRes[252]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[252 + i]); + ((__m256i*)bnProcBufRes)[3684 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3684 + i]); } - p_res = &p_bnProcBufRes[264]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[264 + i]); + ((__m256i*)bnProcBufRes)[3696 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3696 + i]); } - p_res = &p_bnProcBufRes[276]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[276 + i]); + ((__m256i*)bnProcBufRes)[3708 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3708 + i]); } - p_res = &p_bnProcBufRes[288]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[288 + i]); + ((__m256i*)bnProcBufRes)[3720 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3720 + i]); } - p_res = &p_bnProcBufRes[300]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[300 + i]); + ((__m256i*)bnProcBufRes)[3732 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3732 + i]); } - p_res = &p_bnProcBufRes[312]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[312 + i]); + ((__m256i*)bnProcBufRes)[3744 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3744 + i]); } - p_res = &p_bnProcBufRes[324]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[324 + i]); + ((__m256i*)bnProcBufRes)[3756 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3756 + i]); } - p_res = &p_bnProcBufRes[336]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[336 + i]); + ((__m256i*)bnProcBufRes)[3768 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3768 + i]); } - p_res = &p_bnProcBufRes[348]; - p_llrRes = (__m256i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[348 + i]); + ((__m256i*)bnProcBufRes)[3780 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[804 + i ], ((__m256i*) bnProcBuf)[3780 + i]); } } diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG1_R23_AVX2.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG1_R23_AVX2.h index 06e4b4901c6b7613c0e9b687c5ed0486431ee09d..feb842fcee650d6f1f8675435aeaccd33abdb485 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG1_R23_AVX2.h +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG1_R23_AVX2.h @@ -1,128 +1,74 @@ static inline void nrLDPC_bnProc_BG1_R23_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) { - __m256i* p_bnProcBuf; - __m256i* p_bnProcBufRes; - __m256i* p_llrRes; - __m256i* p_res; uint32_t M, i; // Process group with 2 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [3456]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [3456]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [3456]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[108 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[108 + i ], ((__m256i*) bnProcBuf)[108 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [3456]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[120 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[108 + i ], ((__m256i*) bnProcBuf)[120 + i]); } // Process group with 3 CNs M = (5*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [4224]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [4224]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [3840]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[132 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[120 + i ], ((__m256i*) bnProcBuf)[132 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [3840]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m256i*)bnProcBufRes)[192 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[120 + i ], ((__m256i*) bnProcBuf)[192 + i]); } - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [3840]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); + ((__m256i*)bnProcBufRes)[252 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[120 + i ], ((__m256i*) bnProcBuf)[252 + i]); } // Process group with 4 CNs M = (3*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [9984]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [9984]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [5760]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[312 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[180 + i ], ((__m256i*) bnProcBuf)[312 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [5760]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[348 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[180 + i ], ((__m256i*) bnProcBuf)[348 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [5760]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[384 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[180 + i ], ((__m256i*) bnProcBuf)[384 + i]); } - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [5760]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); + ((__m256i*)bnProcBufRes)[420 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[180 + i ], ((__m256i*) bnProcBuf)[420 + i]); } // Process group with 5 CNs M = (7*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [14592]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [14592]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [6912]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[456 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[216 + i ], ((__m256i*) bnProcBuf)[456 + i]); } - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [6912]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); + ((__m256i*)bnProcBufRes)[540 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[216 + i ], ((__m256i*) bnProcBuf)[540 + i]); } - p_res = &p_bnProcBufRes[168]; - p_llrRes = (__m256i*) &llrRes [6912]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]); + ((__m256i*)bnProcBufRes)[624 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[216 + i ], ((__m256i*) bnProcBuf)[624 + i]); } - p_res = &p_bnProcBufRes[252]; - p_llrRes = (__m256i*) &llrRes [6912]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[252 + i]); + ((__m256i*)bnProcBufRes)[708 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[216 + i ], ((__m256i*) bnProcBuf)[708 + i]); } - p_res = &p_bnProcBufRes[336]; - p_llrRes = (__m256i*) &llrRes [6912]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[336 + i]); + ((__m256i*)bnProcBufRes)[792 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[216 + i ], ((__m256i*) bnProcBuf)[792 + i]); } // Process group with 6 CNs M = (8*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [28032]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [28032]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [9600]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[876 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[300 + i ], ((__m256i*) bnProcBuf)[876 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [9600]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m256i*)bnProcBufRes)[972 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[300 + i ], ((__m256i*) bnProcBuf)[972 + i]); } - p_res = &p_bnProcBufRes[192]; - p_llrRes = (__m256i*) &llrRes [9600]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[192 + i]); + ((__m256i*)bnProcBufRes)[1068 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[300 + i ], ((__m256i*) bnProcBuf)[1068 + i]); } - p_res = &p_bnProcBufRes[288]; - p_llrRes = (__m256i*) &llrRes [9600]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[288 + i]); + ((__m256i*)bnProcBufRes)[1164 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[300 + i ], ((__m256i*) bnProcBuf)[1164 + i]); } - p_res = &p_bnProcBufRes[384]; - p_llrRes = (__m256i*) &llrRes [9600]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[384 + i]); + ((__m256i*)bnProcBufRes)[1260 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[300 + i ], ((__m256i*) bnProcBuf)[1260 + i]); } - p_res = &p_bnProcBufRes[480]; - p_llrRes = (__m256i*) &llrRes [9600]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[480 + i]); + ((__m256i*)bnProcBufRes)[1356 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[300 + i ], ((__m256i*) bnProcBuf)[1356 + i]); } // Process group with 7 CNs // Process group with 8 CNs @@ -130,126 +76,76 @@ static inline void nrLDPC_bnProc_BG1_R23_AVX2(int8_t* bnProcBuf,int8_t* bnProcBu // Process group with 10 CNs // Process group with 11 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [46464]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [46464]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [12672]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[1452 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[396 + i ], ((__m256i*) bnProcBuf)[1452 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [12672]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[1464 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[396 + i ], ((__m256i*) bnProcBuf)[1464 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [12672]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[1476 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[396 + i ], ((__m256i*) bnProcBuf)[1476 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [12672]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[1488 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[396 + i ], ((__m256i*) bnProcBuf)[1488 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [12672]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[1500 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[396 + i ], ((__m256i*) bnProcBuf)[1500 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [12672]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m256i*)bnProcBufRes)[1512 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[396 + i ], ((__m256i*) bnProcBuf)[1512 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [12672]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[1524 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[396 + i ], ((__m256i*) bnProcBuf)[1524 + i]); } - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [12672]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); + ((__m256i*)bnProcBufRes)[1536 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[396 + i ], ((__m256i*) bnProcBuf)[1536 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [12672]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m256i*)bnProcBufRes)[1548 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[396 + i ], ((__m256i*) bnProcBuf)[1548 + i]); } - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [12672]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); + ((__m256i*)bnProcBufRes)[1560 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[396 + i ], ((__m256i*) bnProcBuf)[1560 + i]); } - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [12672]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); + ((__m256i*)bnProcBufRes)[1572 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[396 + i ], ((__m256i*) bnProcBuf)[1572 + i]); } // Process group with 12 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [50688]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [50688]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [13056]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[1584 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[408 + i ], ((__m256i*) bnProcBuf)[1584 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [13056]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[1596 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[408 + i ], ((__m256i*) bnProcBuf)[1596 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [13056]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[1608 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[408 + i ], ((__m256i*) bnProcBuf)[1608 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [13056]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[1620 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[408 + i ], ((__m256i*) bnProcBuf)[1620 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [13056]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[1632 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[408 + i ], ((__m256i*) bnProcBuf)[1632 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [13056]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m256i*)bnProcBufRes)[1644 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[408 + i ], ((__m256i*) bnProcBuf)[1644 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [13056]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[1656 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[408 + i ], ((__m256i*) bnProcBuf)[1656 + i]); } - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [13056]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); + ((__m256i*)bnProcBufRes)[1668 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[408 + i ], ((__m256i*) bnProcBuf)[1668 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [13056]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m256i*)bnProcBufRes)[1680 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[408 + i ], ((__m256i*) bnProcBuf)[1680 + i]); } - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [13056]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); + ((__m256i*)bnProcBufRes)[1692 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[408 + i ], ((__m256i*) bnProcBuf)[1692 + i]); } - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [13056]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); + ((__m256i*)bnProcBufRes)[1704 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[408 + i ], ((__m256i*) bnProcBuf)[1704 + i]); } - p_res = &p_bnProcBufRes[132]; - p_llrRes = (__m256i*) &llrRes [13056]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); + ((__m256i*)bnProcBufRes)[1716 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[408 + i ], ((__m256i*) bnProcBuf)[1716 + i]); } // Process group with 13 CNs // Process group with 14 CNs diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG1_R89_AVX2.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG1_R89_AVX2.h index 1b051689dffbc6782bd8c47b9e066907380be9fe..9eaf753bc12e291d00704d411933309ed471038f 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG1_R89_AVX2.h +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG1_R89_AVX2.h @@ -1,94 +1,54 @@ static inline void nrLDPC_bnProc_BG1_R89_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) { - __m256i* p_bnProcBuf; - __m256i* p_bnProcBufRes; - __m256i* p_llrRes; - __m256i* p_res; uint32_t M, i; // Process group with 2 CNs M = (3*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [384]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [384]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [384]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[12 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[12 + i ], ((__m256i*) bnProcBuf)[12 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [384]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[48 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[12 + i ], ((__m256i*) bnProcBuf)[48 + i]); } // Process group with 3 CNs M = (21*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [2688]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [2688]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [1536]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[84 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[48 + i ], ((__m256i*) bnProcBuf)[84 + i]); } - p_res = &p_bnProcBufRes[252]; - p_llrRes = (__m256i*) &llrRes [1536]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[252 + i]); + ((__m256i*)bnProcBufRes)[336 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[48 + i ], ((__m256i*) bnProcBuf)[336 + i]); } - p_res = &p_bnProcBufRes[504]; - p_llrRes = (__m256i*) &llrRes [1536]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[504 + i]); + ((__m256i*)bnProcBufRes)[588 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[48 + i ], ((__m256i*) bnProcBuf)[588 + i]); } // Process group with 4 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [26880]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [26880]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [9600]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[840 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[300 + i ], ((__m256i*) bnProcBuf)[840 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [9600]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[852 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[300 + i ], ((__m256i*) bnProcBuf)[852 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [9600]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[864 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[300 + i ], ((__m256i*) bnProcBuf)[864 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [9600]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[876 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[300 + i ], ((__m256i*) bnProcBuf)[876 + i]); } // Process group with 5 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [28416]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [28416]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [9984]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[888 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[312 + i ], ((__m256i*) bnProcBuf)[888 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [9984]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[900 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[312 + i ], ((__m256i*) bnProcBuf)[900 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [9984]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[912 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[312 + i ], ((__m256i*) bnProcBuf)[912 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [9984]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[924 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[312 + i ], ((__m256i*) bnProcBuf)[924 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [9984]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[936 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[312 + i ], ((__m256i*) bnProcBuf)[936 + i]); } // Process group with 6 CNs // Process group with 7 CNs diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R13_AVX2.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R13_AVX2.c index 3f1c421cebdee768cc056933786826f33ddf9fe6..b5e9d947c058b3a0431fd93fda4f8ad2390d5f53 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R13_AVX2.c +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R13_AVX2.c @@ -1,5 +1,3 @@ -#include <stdint.h> -#include <immintrin.h> void nrLDPC_bnProc_BG2_R13_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) { __m256i* p_bnProcBuf; __m256i* p_bnProcBufRes; @@ -8,188 +6,112 @@ void nrLDPC_bnProc_BG2_R13_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* uint32_t M, i; // Process group with 2 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [6912]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [6912]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [6912]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[216 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[216 + i ], ((__m256i*) bnProcBuf)[216 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [6912]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[228 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[216 + i ], ((__m256i*) bnProcBuf)[228 + i]); } // Process group with 3 CNs // Process group with 4 CNs M = (2*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [7680]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [7680]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [7296]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[240 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[228 + i ], ((__m256i*) bnProcBuf)[240 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [7296]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[264 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[228 + i ], ((__m256i*) bnProcBuf)[264 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [7296]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[288 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[228 + i ], ((__m256i*) bnProcBuf)[288 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [7296]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[312 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[228 + i ], ((__m256i*) bnProcBuf)[312 + i]); } // Process group with 5 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [10752]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [10752]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [8064]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[336 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[252 + i ], ((__m256i*) bnProcBuf)[336 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [8064]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[348 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[252 + i ], ((__m256i*) bnProcBuf)[348 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [8064]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[360 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[252 + i ], ((__m256i*) bnProcBuf)[360 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [8064]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[372 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[252 + i ], ((__m256i*) bnProcBuf)[372 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [8064]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[384 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[252 + i ], ((__m256i*) bnProcBuf)[384 + i]); } // Process group with 6 CNs M = (5*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [12672]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [12672]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [8448]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[396 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[264 + i ], ((__m256i*) bnProcBuf)[396 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [8448]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m256i*)bnProcBufRes)[456 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[264 + i ], ((__m256i*) bnProcBuf)[456 + i]); } - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [8448]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); + ((__m256i*)bnProcBufRes)[516 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[264 + i ], ((__m256i*) bnProcBuf)[516 + i]); } - p_res = &p_bnProcBufRes[180]; - p_llrRes = (__m256i*) &llrRes [8448]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[180 + i]); + ((__m256i*)bnProcBufRes)[576 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[264 + i ], ((__m256i*) bnProcBuf)[576 + i]); } - p_res = &p_bnProcBufRes[240]; - p_llrRes = (__m256i*) &llrRes [8448]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[240 + i]); + ((__m256i*)bnProcBufRes)[636 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[264 + i ], ((__m256i*) bnProcBuf)[636 + i]); } - p_res = &p_bnProcBufRes[300]; - p_llrRes = (__m256i*) &llrRes [8448]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[300 + i]); + ((__m256i*)bnProcBufRes)[696 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[264 + i ], ((__m256i*) bnProcBuf)[696 + i]); } // Process group with 7 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [24192]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [24192]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [10368]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[756 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[324 + i ], ((__m256i*) bnProcBuf)[756 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [10368]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[768 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[324 + i ], ((__m256i*) bnProcBuf)[768 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [10368]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[780 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[324 + i ], ((__m256i*) bnProcBuf)[780 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [10368]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[792 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[324 + i ], ((__m256i*) bnProcBuf)[792 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [10368]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[804 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[324 + i ], ((__m256i*) bnProcBuf)[804 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [10368]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m256i*)bnProcBufRes)[816 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[324 + i ], ((__m256i*) bnProcBuf)[816 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [10368]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[828 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[324 + i ], ((__m256i*) bnProcBuf)[828 + i]); } // Process group with 8 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [26880]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [26880]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [10752]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[840 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[336 + i ], ((__m256i*) bnProcBuf)[840 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [10752]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[852 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[336 + i ], ((__m256i*) bnProcBuf)[852 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [10752]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[864 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[336 + i ], ((__m256i*) bnProcBuf)[864 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [10752]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[876 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[336 + i ], ((__m256i*) bnProcBuf)[876 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [10752]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[888 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[336 + i ], ((__m256i*) bnProcBuf)[888 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [10752]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m256i*)bnProcBufRes)[900 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[336 + i ], ((__m256i*) bnProcBuf)[900 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [10752]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[912 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[336 + i ], ((__m256i*) bnProcBuf)[912 + i]); } - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [10752]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); + ((__m256i*)bnProcBufRes)[924 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[336 + i ], ((__m256i*) bnProcBuf)[924 + i]); } // Process group with 9 CNs // Process group with 10 CNs @@ -197,231 +119,139 @@ void nrLDPC_bnProc_BG2_R13_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* // Process group with 12 CNs // Process group with 13 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [29952]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [29952]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [11136]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[936 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[348 + i ], ((__m256i*) bnProcBuf)[936 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [11136]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[948 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[348 + i ], ((__m256i*) bnProcBuf)[948 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [11136]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[960 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[348 + i ], ((__m256i*) bnProcBuf)[960 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [11136]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[972 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[348 + i ], ((__m256i*) bnProcBuf)[972 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [11136]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[984 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[348 + i ], ((__m256i*) bnProcBuf)[984 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [11136]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m256i*)bnProcBufRes)[996 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[348 + i ], ((__m256i*) bnProcBuf)[996 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [11136]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[1008 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[348 + i ], ((__m256i*) bnProcBuf)[1008 + i]); } - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [11136]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); + ((__m256i*)bnProcBufRes)[1020 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[348 + i ], ((__m256i*) bnProcBuf)[1020 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [11136]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m256i*)bnProcBufRes)[1032 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[348 + i ], ((__m256i*) bnProcBuf)[1032 + i]); } - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [11136]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); + ((__m256i*)bnProcBufRes)[1044 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[348 + i ], ((__m256i*) bnProcBuf)[1044 + i]); } - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [11136]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); + ((__m256i*)bnProcBufRes)[1056 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[348 + i ], ((__m256i*) bnProcBuf)[1056 + i]); } - p_res = &p_bnProcBufRes[132]; - p_llrRes = (__m256i*) &llrRes [11136]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); + ((__m256i*)bnProcBufRes)[1068 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[348 + i ], ((__m256i*) bnProcBuf)[1068 + i]); } - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m256i*) &llrRes [11136]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); + ((__m256i*)bnProcBufRes)[1080 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[348 + i ], ((__m256i*) bnProcBuf)[1080 + i]); } // Process group with 14 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [34944]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [34944]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[1092 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[360 + i ], ((__m256i*) bnProcBuf)[1092 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[1104 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[360 + i ], ((__m256i*) bnProcBuf)[1104 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[1116 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[360 + i ], ((__m256i*) bnProcBuf)[1116 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[1128 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[360 + i ], ((__m256i*) bnProcBuf)[1128 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[1140 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[360 + i ], ((__m256i*) bnProcBuf)[1140 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m256i*)bnProcBufRes)[1152 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[360 + i ], ((__m256i*) bnProcBuf)[1152 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[1164 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[360 + i ], ((__m256i*) bnProcBuf)[1164 + i]); } - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); + ((__m256i*)bnProcBufRes)[1176 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[360 + i ], ((__m256i*) bnProcBuf)[1176 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m256i*)bnProcBufRes)[1188 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[360 + i ], ((__m256i*) bnProcBuf)[1188 + i]); } - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); + ((__m256i*)bnProcBufRes)[1200 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[360 + i ], ((__m256i*) bnProcBuf)[1200 + i]); } - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); + ((__m256i*)bnProcBufRes)[1212 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[360 + i ], ((__m256i*) bnProcBuf)[1212 + i]); } - p_res = &p_bnProcBufRes[132]; - p_llrRes = (__m256i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); + ((__m256i*)bnProcBufRes)[1224 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[360 + i ], ((__m256i*) bnProcBuf)[1224 + i]); } - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m256i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); + ((__m256i*)bnProcBufRes)[1236 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[360 + i ], ((__m256i*) bnProcBuf)[1236 + i]); } - p_res = &p_bnProcBufRes[156]; - p_llrRes = (__m256i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]); + ((__m256i*)bnProcBufRes)[1248 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[360 + i ], ((__m256i*) bnProcBuf)[1248 + i]); } // Process group with 15 CNs // Process group with 16 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [40320]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [40320]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[1260 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[372 + i ], ((__m256i*) bnProcBuf)[1260 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[1272 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[372 + i ], ((__m256i*) bnProcBuf)[1272 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[1284 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[372 + i ], ((__m256i*) bnProcBuf)[1284 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[1296 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[372 + i ], ((__m256i*) bnProcBuf)[1296 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[1308 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[372 + i ], ((__m256i*) bnProcBuf)[1308 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m256i*)bnProcBufRes)[1320 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[372 + i ], ((__m256i*) bnProcBuf)[1320 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[1332 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[372 + i ], ((__m256i*) bnProcBuf)[1332 + i]); } - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); + ((__m256i*)bnProcBufRes)[1344 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[372 + i ], ((__m256i*) bnProcBuf)[1344 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m256i*)bnProcBufRes)[1356 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[372 + i ], ((__m256i*) bnProcBuf)[1356 + i]); } - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); + ((__m256i*)bnProcBufRes)[1368 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[372 + i ], ((__m256i*) bnProcBuf)[1368 + i]); } - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); + ((__m256i*)bnProcBufRes)[1380 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[372 + i ], ((__m256i*) bnProcBuf)[1380 + i]); } - p_res = &p_bnProcBufRes[132]; - p_llrRes = (__m256i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); + ((__m256i*)bnProcBufRes)[1392 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[372 + i ], ((__m256i*) bnProcBuf)[1392 + i]); } - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m256i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); + ((__m256i*)bnProcBufRes)[1404 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[372 + i ], ((__m256i*) bnProcBuf)[1404 + i]); } - p_res = &p_bnProcBufRes[156]; - p_llrRes = (__m256i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]); + ((__m256i*)bnProcBufRes)[1416 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[372 + i ], ((__m256i*) bnProcBuf)[1416 + i]); } - p_res = &p_bnProcBufRes[168]; - p_llrRes = (__m256i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]); + ((__m256i*)bnProcBufRes)[1428 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[372 + i ], ((__m256i*) bnProcBuf)[1428 + i]); } - p_res = &p_bnProcBufRes[180]; - p_llrRes = (__m256i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[180 + i]); + ((__m256i*)bnProcBufRes)[1440 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[372 + i ], ((__m256i*) bnProcBuf)[1440 + i]); } // Process group with 17 CNs // Process group with 18 CNs diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R15_AVX2.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R15_AVX2.c index f649102df7a8cde518242a39292f8180778425e4..b8232a334ac791432c3705c0626c8f7a7015cce8 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R15_AVX2.c +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R15_AVX2.c @@ -1,5 +1,3 @@ -#include <stdint.h> -#include <immintrin.h> void nrLDPC_bnProc_BG2_R15_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) { __m256i* p_bnProcBuf; __m256i* p_bnProcBufRes; @@ -11,545 +9,325 @@ void nrLDPC_bnProc_BG2_R15_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* // Process group with 4 CNs // Process group with 5 CNs M = (2*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [14592]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [14592]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [14592]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[456 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[456 + i ], ((__m256i*) bnProcBuf)[456 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [14592]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[480 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[456 + i ], ((__m256i*) bnProcBuf)[480 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [14592]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[504 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[456 + i ], ((__m256i*) bnProcBuf)[504 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [14592]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[528 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[456 + i ], ((__m256i*) bnProcBuf)[528 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [14592]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m256i*)bnProcBufRes)[552 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[456 + i ], ((__m256i*) bnProcBuf)[552 + i]); } // Process group with 6 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [18432]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [18432]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [15360]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[576 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[480 + i ], ((__m256i*) bnProcBuf)[576 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [15360]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[588 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[480 + i ], ((__m256i*) bnProcBuf)[588 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [15360]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[600 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[480 + i ], ((__m256i*) bnProcBuf)[600 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [15360]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[612 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[480 + i ], ((__m256i*) bnProcBuf)[612 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [15360]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[624 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[480 + i ], ((__m256i*) bnProcBuf)[624 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [15360]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m256i*)bnProcBufRes)[636 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[480 + i ], ((__m256i*) bnProcBuf)[636 + i]); } // Process group with 7 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [20736]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [20736]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [15744]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[648 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[492 + i ], ((__m256i*) bnProcBuf)[648 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [15744]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[660 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[492 + i ], ((__m256i*) bnProcBuf)[660 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [15744]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[672 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[492 + i ], ((__m256i*) bnProcBuf)[672 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [15744]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[684 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[492 + i ], ((__m256i*) bnProcBuf)[684 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [15744]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[696 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[492 + i ], ((__m256i*) bnProcBuf)[696 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [15744]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m256i*)bnProcBufRes)[708 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[492 + i ], ((__m256i*) bnProcBuf)[708 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [15744]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[720 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[492 + i ], ((__m256i*) bnProcBuf)[720 + i]); } // Process group with 8 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [23424]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [23424]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [16128]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[732 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[504 + i ], ((__m256i*) bnProcBuf)[732 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [16128]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[744 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[504 + i ], ((__m256i*) bnProcBuf)[744 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [16128]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[756 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[504 + i ], ((__m256i*) bnProcBuf)[756 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [16128]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[768 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[504 + i ], ((__m256i*) bnProcBuf)[768 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [16128]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[780 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[504 + i ], ((__m256i*) bnProcBuf)[780 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [16128]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m256i*)bnProcBufRes)[792 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[504 + i ], ((__m256i*) bnProcBuf)[792 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [16128]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[804 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[504 + i ], ((__m256i*) bnProcBuf)[804 + i]); } - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [16128]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); + ((__m256i*)bnProcBufRes)[816 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[504 + i ], ((__m256i*) bnProcBuf)[816 + i]); } // Process group with 9 CNs M = (2*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [26496]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [26496]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[828 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[516 + i ], ((__m256i*) bnProcBuf)[828 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[852 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[516 + i ], ((__m256i*) bnProcBuf)[852 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[876 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[516 + i ], ((__m256i*) bnProcBuf)[876 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[900 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[516 + i ], ((__m256i*) bnProcBuf)[900 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m256i*)bnProcBufRes)[924 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[516 + i ], ((__m256i*) bnProcBuf)[924 + i]); } - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); + ((__m256i*)bnProcBufRes)[948 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[516 + i ], ((__m256i*) bnProcBuf)[948 + i]); } - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m256i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); + ((__m256i*)bnProcBufRes)[972 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[516 + i ], ((__m256i*) bnProcBuf)[972 + i]); } - p_res = &p_bnProcBufRes[168]; - p_llrRes = (__m256i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]); + ((__m256i*)bnProcBufRes)[996 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[516 + i ], ((__m256i*) bnProcBuf)[996 + i]); } - p_res = &p_bnProcBufRes[192]; - p_llrRes = (__m256i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[192 + i]); + ((__m256i*)bnProcBufRes)[1020 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[516 + i ], ((__m256i*) bnProcBuf)[1020 + i]); } // Process group with 10 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [33408]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [33408]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [17280]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[1044 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[540 + i ], ((__m256i*) bnProcBuf)[1044 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [17280]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[1056 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[540 + i ], ((__m256i*) bnProcBuf)[1056 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [17280]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[1068 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[540 + i ], ((__m256i*) bnProcBuf)[1068 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [17280]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[1080 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[540 + i ], ((__m256i*) bnProcBuf)[1080 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [17280]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[1092 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[540 + i ], ((__m256i*) bnProcBuf)[1092 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [17280]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m256i*)bnProcBufRes)[1104 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[540 + i ], ((__m256i*) bnProcBuf)[1104 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [17280]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[1116 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[540 + i ], ((__m256i*) bnProcBuf)[1116 + i]); } - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [17280]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); + ((__m256i*)bnProcBufRes)[1128 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[540 + i ], ((__m256i*) bnProcBuf)[1128 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [17280]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m256i*)bnProcBufRes)[1140 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[540 + i ], ((__m256i*) bnProcBuf)[1140 + i]); } - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [17280]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); + ((__m256i*)bnProcBufRes)[1152 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[540 + i ], ((__m256i*) bnProcBuf)[1152 + i]); } // Process group with 11 CNs // Process group with 12 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [37248]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [37248]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[1164 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[552 + i ], ((__m256i*) bnProcBuf)[1164 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[1176 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[552 + i ], ((__m256i*) bnProcBuf)[1176 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[1188 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[552 + i ], ((__m256i*) bnProcBuf)[1188 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[1200 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[552 + i ], ((__m256i*) bnProcBuf)[1200 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[1212 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[552 + i ], ((__m256i*) bnProcBuf)[1212 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m256i*)bnProcBufRes)[1224 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[552 + i ], ((__m256i*) bnProcBuf)[1224 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[1236 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[552 + i ], ((__m256i*) bnProcBuf)[1236 + i]); } - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); + ((__m256i*)bnProcBufRes)[1248 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[552 + i ], ((__m256i*) bnProcBuf)[1248 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m256i*)bnProcBufRes)[1260 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[552 + i ], ((__m256i*) bnProcBuf)[1260 + i]); } - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); + ((__m256i*)bnProcBufRes)[1272 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[552 + i ], ((__m256i*) bnProcBuf)[1272 + i]); } - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); + ((__m256i*)bnProcBufRes)[1284 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[552 + i ], ((__m256i*) bnProcBuf)[1284 + i]); } - p_res = &p_bnProcBufRes[132]; - p_llrRes = (__m256i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); + ((__m256i*)bnProcBufRes)[1296 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[552 + i ], ((__m256i*) bnProcBuf)[1296 + i]); } // Process group with 13 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [41856]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [41856]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [18048]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[1308 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[564 + i ], ((__m256i*) bnProcBuf)[1308 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [18048]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[1320 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[564 + i ], ((__m256i*) bnProcBuf)[1320 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [18048]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[1332 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[564 + i ], ((__m256i*) bnProcBuf)[1332 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [18048]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[1344 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[564 + i ], ((__m256i*) bnProcBuf)[1344 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [18048]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[1356 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[564 + i ], ((__m256i*) bnProcBuf)[1356 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [18048]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m256i*)bnProcBufRes)[1368 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[564 + i ], ((__m256i*) bnProcBuf)[1368 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [18048]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[1380 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[564 + i ], ((__m256i*) bnProcBuf)[1380 + i]); } - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [18048]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); + ((__m256i*)bnProcBufRes)[1392 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[564 + i ], ((__m256i*) bnProcBuf)[1392 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [18048]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m256i*)bnProcBufRes)[1404 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[564 + i ], ((__m256i*) bnProcBuf)[1404 + i]); } - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [18048]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); + ((__m256i*)bnProcBufRes)[1416 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[564 + i ], ((__m256i*) bnProcBuf)[1416 + i]); } - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [18048]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); + ((__m256i*)bnProcBufRes)[1428 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[564 + i ], ((__m256i*) bnProcBuf)[1428 + i]); } - p_res = &p_bnProcBufRes[132]; - p_llrRes = (__m256i*) &llrRes [18048]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); + ((__m256i*)bnProcBufRes)[1440 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[564 + i ], ((__m256i*) bnProcBuf)[1440 + i]); } - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m256i*) &llrRes [18048]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); + ((__m256i*)bnProcBufRes)[1452 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[564 + i ], ((__m256i*) bnProcBuf)[1452 + i]); } // Process group with 14 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [46848]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [46848]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[1464 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[576 + i ], ((__m256i*) bnProcBuf)[1464 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[1476 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[576 + i ], ((__m256i*) bnProcBuf)[1476 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[1488 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[576 + i ], ((__m256i*) bnProcBuf)[1488 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[1500 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[576 + i ], ((__m256i*) bnProcBuf)[1500 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[1512 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[576 + i ], ((__m256i*) bnProcBuf)[1512 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m256i*)bnProcBufRes)[1524 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[576 + i ], ((__m256i*) bnProcBuf)[1524 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[1536 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[576 + i ], ((__m256i*) bnProcBuf)[1536 + i]); } - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); + ((__m256i*)bnProcBufRes)[1548 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[576 + i ], ((__m256i*) bnProcBuf)[1548 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m256i*)bnProcBufRes)[1560 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[576 + i ], ((__m256i*) bnProcBuf)[1560 + i]); } - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); + ((__m256i*)bnProcBufRes)[1572 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[576 + i ], ((__m256i*) bnProcBuf)[1572 + i]); } - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); + ((__m256i*)bnProcBufRes)[1584 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[576 + i ], ((__m256i*) bnProcBuf)[1584 + i]); } - p_res = &p_bnProcBufRes[132]; - p_llrRes = (__m256i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); + ((__m256i*)bnProcBufRes)[1596 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[576 + i ], ((__m256i*) bnProcBuf)[1596 + i]); } - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m256i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); + ((__m256i*)bnProcBufRes)[1608 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[576 + i ], ((__m256i*) bnProcBuf)[1608 + i]); } - p_res = &p_bnProcBufRes[156]; - p_llrRes = (__m256i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]); + ((__m256i*)bnProcBufRes)[1620 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[576 + i ], ((__m256i*) bnProcBuf)[1620 + i]); } // Process group with 15 CNs // Process group with 16 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [52224]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [52224]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[1632 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[588 + i ], ((__m256i*) bnProcBuf)[1632 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[1644 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[588 + i ], ((__m256i*) bnProcBuf)[1644 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[1656 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[588 + i ], ((__m256i*) bnProcBuf)[1656 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[1668 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[588 + i ], ((__m256i*) bnProcBuf)[1668 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[1680 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[588 + i ], ((__m256i*) bnProcBuf)[1680 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m256i*)bnProcBufRes)[1692 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[588 + i ], ((__m256i*) bnProcBuf)[1692 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[1704 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[588 + i ], ((__m256i*) bnProcBuf)[1704 + i]); } - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); + ((__m256i*)bnProcBufRes)[1716 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[588 + i ], ((__m256i*) bnProcBuf)[1716 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m256i*)bnProcBufRes)[1728 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[588 + i ], ((__m256i*) bnProcBuf)[1728 + i]); } - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); + ((__m256i*)bnProcBufRes)[1740 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[588 + i ], ((__m256i*) bnProcBuf)[1740 + i]); } - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); + ((__m256i*)bnProcBufRes)[1752 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[588 + i ], ((__m256i*) bnProcBuf)[1752 + i]); } - p_res = &p_bnProcBufRes[132]; - p_llrRes = (__m256i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); + ((__m256i*)bnProcBufRes)[1764 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[588 + i ], ((__m256i*) bnProcBuf)[1764 + i]); } - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m256i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); + ((__m256i*)bnProcBufRes)[1776 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[588 + i ], ((__m256i*) bnProcBuf)[1776 + i]); } - p_res = &p_bnProcBufRes[156]; - p_llrRes = (__m256i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]); + ((__m256i*)bnProcBufRes)[1788 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[588 + i ], ((__m256i*) bnProcBuf)[1788 + i]); } - p_res = &p_bnProcBufRes[168]; - p_llrRes = (__m256i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]); + ((__m256i*)bnProcBufRes)[1800 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[588 + i ], ((__m256i*) bnProcBuf)[1800 + i]); } - p_res = &p_bnProcBufRes[180]; - p_llrRes = (__m256i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[180 + i]); + ((__m256i*)bnProcBufRes)[1812 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[588 + i ], ((__m256i*) bnProcBuf)[1812 + i]); } // Process group with 17 CNs // Process group with 18 CNs @@ -558,236 +336,142 @@ void nrLDPC_bnProc_BG2_R15_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* // Process group with 21 CNs // Process group with 22 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [58368]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [58368]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[1824 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[1824 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[1836 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[1836 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[1848 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[1848 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[1860 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[1860 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[1872 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[1872 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m256i*)bnProcBufRes)[1884 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[1884 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[1896 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[1896 + i]); } - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); + ((__m256i*)bnProcBufRes)[1908 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[1908 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m256i*)bnProcBufRes)[1920 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[1920 + i]); } - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); + ((__m256i*)bnProcBufRes)[1932 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[1932 + i]); } - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); + ((__m256i*)bnProcBufRes)[1944 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[1944 + i]); } - p_res = &p_bnProcBufRes[132]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); + ((__m256i*)bnProcBufRes)[1956 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[1956 + i]); } - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); + ((__m256i*)bnProcBufRes)[1968 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[1968 + i]); } - p_res = &p_bnProcBufRes[156]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]); + ((__m256i*)bnProcBufRes)[1980 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[1980 + i]); } - p_res = &p_bnProcBufRes[168]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]); + ((__m256i*)bnProcBufRes)[1992 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[1992 + i]); } - p_res = &p_bnProcBufRes[180]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[180 + i]); + ((__m256i*)bnProcBufRes)[2004 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[2004 + i]); } - p_res = &p_bnProcBufRes[192]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[192 + i]); + ((__m256i*)bnProcBufRes)[2016 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[2016 + i]); } - p_res = &p_bnProcBufRes[204]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[204 + i]); + ((__m256i*)bnProcBufRes)[2028 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[2028 + i]); } - p_res = &p_bnProcBufRes[216]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[216 + i]); + ((__m256i*)bnProcBufRes)[2040 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[2040 + i]); } - p_res = &p_bnProcBufRes[228]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[228 + i]); + ((__m256i*)bnProcBufRes)[2052 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[2052 + i]); } - p_res = &p_bnProcBufRes[240]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[240 + i]); + ((__m256i*)bnProcBufRes)[2064 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[2064 + i]); } - p_res = &p_bnProcBufRes[252]; - p_llrRes = (__m256i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[252 + i]); + ((__m256i*)bnProcBufRes)[2076 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[600 + i ], ((__m256i*) bnProcBuf)[2076 + i]); } // Process group with <23 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [66816]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [66816]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[2088 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[612 + i ], ((__m256i*) bnProcBuf)[2088 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[2100 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[612 + i ], ((__m256i*) bnProcBuf)[2100 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[2112 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[612 + i ], ((__m256i*) bnProcBuf)[2112 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[2124 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[612 + i ], ((__m256i*) bnProcBuf)[2124 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[2136 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[612 + i ], ((__m256i*) bnProcBuf)[2136 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m256i*)bnProcBufRes)[2148 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[612 + i ], ((__m256i*) bnProcBuf)[2148 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[2160 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[612 + i ], ((__m256i*) bnProcBuf)[2160 + i]); } - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); + ((__m256i*)bnProcBufRes)[2172 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[612 + i ], ((__m256i*) bnProcBuf)[2172 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m256i*)bnProcBufRes)[2184 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[612 + i ], ((__m256i*) bnProcBuf)[2184 + i]); } - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); + ((__m256i*)bnProcBufRes)[2196 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[612 + i ], ((__m256i*) bnProcBuf)[2196 + i]); } - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); + ((__m256i*)bnProcBufRes)[2208 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[612 + i ], ((__m256i*) bnProcBuf)[2208 + i]); } - p_res = &p_bnProcBufRes[132]; - p_llrRes = (__m256i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); + ((__m256i*)bnProcBufRes)[2220 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[612 + i ], ((__m256i*) bnProcBuf)[2220 + i]); } - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m256i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); + ((__m256i*)bnProcBufRes)[2232 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[612 + i ], ((__m256i*) bnProcBuf)[2232 + i]); } - p_res = &p_bnProcBufRes[156]; - p_llrRes = (__m256i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]); + ((__m256i*)bnProcBufRes)[2244 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[612 + i ], ((__m256i*) bnProcBuf)[2244 + i]); } - p_res = &p_bnProcBufRes[168]; - p_llrRes = (__m256i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]); + ((__m256i*)bnProcBufRes)[2256 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[612 + i ], ((__m256i*) bnProcBuf)[2256 + i]); } - p_res = &p_bnProcBufRes[180]; - p_llrRes = (__m256i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[180 + i]); + ((__m256i*)bnProcBufRes)[2268 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[612 + i ], ((__m256i*) bnProcBuf)[2268 + i]); } - p_res = &p_bnProcBufRes[192]; - p_llrRes = (__m256i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[192 + i]); + ((__m256i*)bnProcBufRes)[2280 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[612 + i ], ((__m256i*) bnProcBuf)[2280 + i]); } - p_res = &p_bnProcBufRes[204]; - p_llrRes = (__m256i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[204 + i]); + ((__m256i*)bnProcBufRes)[2292 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[612 + i ], ((__m256i*) bnProcBuf)[2292 + i]); } - p_res = &p_bnProcBufRes[216]; - p_llrRes = (__m256i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[216 + i]); + ((__m256i*)bnProcBufRes)[2304 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[612 + i ], ((__m256i*) bnProcBuf)[2304 + i]); } - p_res = &p_bnProcBufRes[228]; - p_llrRes = (__m256i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[228 + i]); + ((__m256i*)bnProcBufRes)[2316 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[612 + i ], ((__m256i*) bnProcBuf)[2316 + i]); } - p_res = &p_bnProcBufRes[240]; - p_llrRes = (__m256i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[240 + i]); + ((__m256i*)bnProcBufRes)[2328 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[612 + i ], ((__m256i*) bnProcBuf)[2328 + i]); } - p_res = &p_bnProcBufRes[252]; - p_llrRes = (__m256i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[252 + i]); + ((__m256i*)bnProcBufRes)[2340 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[612 + i ], ((__m256i*) bnProcBuf)[2340 + i]); } - p_res = &p_bnProcBufRes[264]; - p_llrRes = (__m256i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[264 + i]); + ((__m256i*)bnProcBufRes)[2352 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[612 + i ], ((__m256i*) bnProcBuf)[2352 + i]); } // Process group with 24 CNs // Process group with 25 CNs diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R23_AVX2.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R23_AVX2.c index 59a1613099c8da476887c855153e5a4b2ebdd575..be6b03b294d3cd2ed4bce0d7641eb1acb06bb271 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R23_AVX2.c +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R23_AVX2.c @@ -1,5 +1,3 @@ -#include <stdint.h> -#include <immintrin.h> void nrLDPC_bnProc_BG2_R23_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) { __m256i* p_bnProcBuf; __m256i* p_bnProcBufRes; @@ -8,123 +6,73 @@ void nrLDPC_bnProc_BG2_R23_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* uint32_t M, i; // Process group with 2 CNs M = (3*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [1152]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [1152]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [1152]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[36 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[36 + i ], ((__m256i*) bnProcBuf)[36 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [1152]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[72 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[36 + i ], ((__m256i*) bnProcBuf)[72 + i]); } // Process group with 3 CNs M = (5*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [3456]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [3456]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [2304]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[108 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[72 + i ], ((__m256i*) bnProcBuf)[108 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [2304]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m256i*)bnProcBufRes)[168 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[72 + i ], ((__m256i*) bnProcBuf)[168 + i]); } - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [2304]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); + ((__m256i*)bnProcBufRes)[228 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[72 + i ], ((__m256i*) bnProcBuf)[228 + i]); } // Process group with 4 CNs M = (3*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [9216]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [9216]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [4224]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[288 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[132 + i ], ((__m256i*) bnProcBuf)[288 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [4224]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[324 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[132 + i ], ((__m256i*) bnProcBuf)[324 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [4224]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[360 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[132 + i ], ((__m256i*) bnProcBuf)[360 + i]); } - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [4224]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); + ((__m256i*)bnProcBufRes)[396 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[132 + i ], ((__m256i*) bnProcBuf)[396 + i]); } // Process group with 5 CNs M = (2*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [13824]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [13824]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [5376]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[432 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[168 + i ], ((__m256i*) bnProcBuf)[432 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [5376]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[456 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[168 + i ], ((__m256i*) bnProcBuf)[456 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [5376]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[480 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[168 + i ], ((__m256i*) bnProcBuf)[480 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [5376]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m256i*)bnProcBufRes)[504 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[168 + i ], ((__m256i*) bnProcBuf)[504 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [5376]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m256i*)bnProcBufRes)[528 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[168 + i ], ((__m256i*) bnProcBuf)[528 + i]); } // Process group with 6 CNs M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [17664]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [17664]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [6144]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m256i*)bnProcBufRes)[552 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[192 + i ], ((__m256i*) bnProcBuf)[552 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [6144]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m256i*)bnProcBufRes)[564 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[192 + i ], ((__m256i*) bnProcBuf)[564 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [6144]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m256i*)bnProcBufRes)[576 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[192 + i ], ((__m256i*) bnProcBuf)[576 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [6144]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m256i*)bnProcBufRes)[588 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[192 + i ], ((__m256i*) bnProcBuf)[588 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [6144]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m256i*)bnProcBufRes)[600 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[192 + i ], ((__m256i*) bnProcBuf)[600 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [6144]; for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m256i*)bnProcBufRes)[612 + i ] = _mm256_subs_epi8(((__m256i*)llrRes)[192 + i ], ((__m256i*) bnProcBuf)[612 + i]); } // Process group with 7 CNs // Process group with 8 CNs diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG1_R13_AVX2.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG1_R13_AVX2.h index b10a842171b778f7d4323b6e1ad35ac6deb01b95..eae3f76ea9ac3d61cd2376b410ec95e6b68d0477 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG1_R13_AVX2.h +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG1_R13_AVX2.h @@ -1,4 +1,4 @@ -static inline void nrLDPC_bnProcPc_BG1_R13_AVX2(int8_t* bnProcBuf,int8_t* llrRes , int8_t* llrProcBuf, uint16_t Z ) { +static inline void nrLDPC_bnProcPc_BG1_R13_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes,int8_t* llrRes , int8_t* llrProcBuf, uint16_t Z ) { __m256i ymm0, ymm1, ymmRes0, ymmRes1; __m128i* p_bnProcBuf; __m128i* p_llrProcBuf; diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG1_R23_AVX2.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG1_R23_AVX2.h index 8647e8cde6a758fa7e38dbbda53184d7559cfc38..0e0ded9a681d3a82798ac4587b2e15f3f5db0dc7 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG1_R23_AVX2.h +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG1_R23_AVX2.h @@ -1,4 +1,4 @@ -static inline void nrLDPC_bnProcPc_BG1_R23_AVX2(int8_t* bnProcBuf,int8_t* llrRes , int8_t* llrProcBuf, uint16_t Z ) { +static inline void nrLDPC_bnProcPc_BG1_R23_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes,int8_t* llrRes , int8_t* llrProcBuf, uint16_t Z ) { __m256i ymm0, ymm1, ymmRes0, ymmRes1; __m128i* p_bnProcBuf; __m128i* p_llrProcBuf; @@ -11,8 +11,8 @@ static inline void nrLDPC_bnProcPc_BG1_R23_AVX2(int8_t* bnProcBuf,int8_t* llrRes p_llrProcBuf = (__m128i*) &llrProcBuf [3456]; p_llrRes = (__m256i*) &llrRes [3456]; for (int i=0,j=0;i<M;i++,j+=2) { - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]); - ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j + 1]); + ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]); + ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j + 1]); ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[24 + j]); ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0); ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[24 + j +1]); diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG1_R89_AVX2.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG1_R89_AVX2.h index aee271db84d036d34c317d326ede442d84b51056..b971f62cc3e9e3028b93b5c27d28f95e0a480c86 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG1_R89_AVX2.h +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG1_R89_AVX2.h @@ -1,4 +1,4 @@ -static inline void nrLDPC_bnProcPc_BG1_R89_AVX2(int8_t* bnProcBuf,int8_t* llrRes , int8_t* llrProcBuf, uint16_t Z ) { +static inline void nrLDPC_bnProcPc_BG1_R89_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes,int8_t* llrRes , int8_t* llrProcBuf, uint16_t Z ) { __m256i ymm0, ymm1, ymmRes0, ymmRes1; __m128i* p_bnProcBuf; __m128i* p_llrProcBuf; @@ -11,8 +11,8 @@ static inline void nrLDPC_bnProcPc_BG1_R89_AVX2(int8_t* bnProcBuf,int8_t* llrRes p_llrProcBuf = (__m128i*) &llrProcBuf [384]; p_llrRes = (__m256i*) &llrRes [384]; for (int i=0,j=0;i<M;i++,j+=2) { - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]); - ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j + 1]); + ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]); + ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j + 1]); ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[72 + j]); ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0); ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[72 + j +1]); diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG2_R13_AVX2.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG2_R13_AVX2.h index c4268cd7af3714e6faed55ddb6aefd54d015bcf9..d2b135f732db25679458b9a0dc87166b3286502e 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG2_R13_AVX2.h +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG2_R13_AVX2.h @@ -1,32 +1,18 @@ -static inline void nrLDPC_bnProcPc_BG2_R13_AVX2(int8_t* bnProcBuf,int8_t* llrRes , int8_t* llrProcBuf, uint16_t Z ) { +static inline void nrLDPC_bnProcPc_BG2_R13_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes,int8_t* llrRes , int8_t* llrProcBuf, uint16_t Z ) { __m256i ymm0, ymm1, ymmRes0, ymmRes1; __m128i* p_bnProcBuf; __m128i* p_llrProcBuf; __m256i* p_llrRes; uint32_t M ; // Process group with 1 CNs - M = (18*Z + 31)>>5; - p_bnProcBuf = (__m128i*) &bnProcBuf [0]; - p_llrProcBuf = (__m128i*) &llrProcBuf [0]; - p_llrRes = (__m256i*) &llrRes [0]; - for (int i=0,j=0;i<M;i++,j+=2) { - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]); - ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j]); - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]); - ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0); - ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]); - ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); - ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); - p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8); -} // Process group with 2 CNs M = (1*Z + 31)>>5; p_bnProcBuf = (__m128i*) &bnProcBuf [6912]; p_llrProcBuf = (__m128i*) &llrProcBuf [6912]; p_llrRes = (__m256i*) &llrRes [6912]; for (int i=0,j=0;i<M;i++,j+=2) { - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]); - ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j + 1]); + ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]); + ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j + 1]); ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[24 + j]); ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0); ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[24 + j +1]); diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG2_R15_AVX2.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG2_R15_AVX2.h index 3a43ebe6b597c1c61f7d51df9cad22d7b3a5d6f1..5598eba32ca2282b23d0dd2af2cdd4fb8cc29a9d 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG2_R15_AVX2.h +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG2_R15_AVX2.h @@ -1,24 +1,10 @@ -static inline void nrLDPC_bnProcPc_BG2_R15_AVX2(int8_t* bnProcBuf,int8_t* llrRes , int8_t* llrProcBuf, uint16_t Z ) { +static inline void nrLDPC_bnProcPc_BG2_R15_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes,int8_t* llrRes , int8_t* llrProcBuf, uint16_t Z ) { __m256i ymm0, ymm1, ymmRes0, ymmRes1; __m128i* p_bnProcBuf; __m128i* p_llrProcBuf; __m256i* p_llrRes; uint32_t M ; // Process group with 1 CNs - M = (38*Z + 31)>>5; - p_bnProcBuf = (__m128i*) &bnProcBuf [0]; - p_llrProcBuf = (__m128i*) &llrProcBuf [0]; - p_llrRes = (__m256i*) &llrRes [0]; - for (int i=0,j=0;i<M;i++,j+=2) { - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]); - ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j]); - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]); - ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0); - ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]); - ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); - ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); - p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8); -} // Process group with 2 CNs // Process group with 3 CNs // Process group with 4 CNs diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG2_R23_AVX2.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG2_R23_AVX2.h index aec88387610a86321abc9d4d1797ae7f301417cd..3ba536086d9acc97ece17c2092705ed9802b5f47 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG2_R23_AVX2.h +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG2_R23_AVX2.h @@ -1,32 +1,18 @@ -static inline void nrLDPC_bnProcPc_BG2_R23_AVX2(int8_t* bnProcBuf,int8_t* llrRes , int8_t* llrProcBuf, uint16_t Z ) { +static inline void nrLDPC_bnProcPc_BG2_R23_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes,int8_t* llrRes , int8_t* llrProcBuf, uint16_t Z ) { __m256i ymm0, ymm1, ymmRes0, ymmRes1; __m128i* p_bnProcBuf; __m128i* p_llrProcBuf; __m256i* p_llrRes; uint32_t M ; // Process group with 1 CNs - M = (3*Z + 31)>>5; - p_bnProcBuf = (__m128i*) &bnProcBuf [0]; - p_llrProcBuf = (__m128i*) &llrProcBuf [0]; - p_llrRes = (__m256i*) &llrRes [0]; - for (int i=0,j=0;i<M;i++,j+=2) { - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]); - ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j]); - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[j+1]); - ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0); - ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]); - ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); - ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); - p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8); -} // Process group with 2 CNs M = (3*Z + 31)>>5; p_bnProcBuf = (__m128i*) &bnProcBuf [1152]; p_llrProcBuf = (__m128i*) &llrProcBuf [1152]; p_llrRes = (__m256i*) &llrRes [1152]; for (int i=0,j=0;i<M;i++,j+=2) { - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]); - ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j + 1]); + ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]); + ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j + 1]); ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[72 + j]); ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0); ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[72 + j +1]); diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R13_AVX512.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R13_AVX512.h index d9738fd57aa6a6fd9268ddaed68cf45d43ad5fca..c96ae199e7852eb28cac06829844ac771fb4bd32 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R13_AVX512.h +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R13_AVX512.h @@ -1,475 +1,281 @@ static inline void nrLDPC_bnProc_BG1_R13_AVX512(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) { - __m512i* p_bnProcBuf; - __m512i* p_bnProcBufRes; - __m512i* p_llrRes; - __m512i* p_res; uint32_t M, i; // Process group with 2 CNs // Process group with 3 CNs // Process group with 4 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [16128]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [16128]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [16128]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[252 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[252 + i ], ((__m512i*) bnProcBuf)[252 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [16128]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[258 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[252 + i ], ((__m512i*) bnProcBuf)[258 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [16128]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[264 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[252 + i ], ((__m512i*) bnProcBuf)[264 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [16128]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[270 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[252 + i ], ((__m512i*) bnProcBuf)[270 + i]); } // Process group with 5 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [17664]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [17664]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[276 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[258 + i ], ((__m512i*) bnProcBuf)[276 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[282 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[258 + i ], ((__m512i*) bnProcBuf)[282 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[288 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[258 + i ], ((__m512i*) bnProcBuf)[288 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[294 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[258 + i ], ((__m512i*) bnProcBuf)[294 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[300 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[258 + i ], ((__m512i*) bnProcBuf)[300 + i]); } // Process group with 6 CNs M = (2*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [19584]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [19584]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [16896]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[306 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[264 + i ], ((__m512i*) bnProcBuf)[306 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [16896]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[318 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[264 + i ], ((__m512i*) bnProcBuf)[318 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [16896]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[330 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[264 + i ], ((__m512i*) bnProcBuf)[330 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [16896]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[342 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[264 + i ], ((__m512i*) bnProcBuf)[342 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m512i*) &llrRes [16896]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m512i*)bnProcBufRes)[354 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[264 + i ], ((__m512i*) bnProcBuf)[354 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m512i*) &llrRes [16896]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m512i*)bnProcBufRes)[366 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[264 + i ], ((__m512i*) bnProcBuf)[366 + i]); } // Process group with 7 CNs M = (4*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [24192]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [24192]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[378 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[276 + i ], ((__m512i*) bnProcBuf)[378 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[402 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[276 + i ], ((__m512i*) bnProcBuf)[402 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m512i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m512i*)bnProcBufRes)[426 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[276 + i ], ((__m512i*) bnProcBuf)[426 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m512i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m512i*)bnProcBufRes)[450 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[276 + i ], ((__m512i*) bnProcBuf)[450 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m512i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m512i*)bnProcBufRes)[474 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[276 + i ], ((__m512i*) bnProcBuf)[474 + i]); } - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m512i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); + ((__m512i*)bnProcBufRes)[498 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[276 + i ], ((__m512i*) bnProcBuf)[498 + i]); } - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m512i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); + ((__m512i*)bnProcBufRes)[522 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[276 + i ], ((__m512i*) bnProcBuf)[522 + i]); } // Process group with 8 CNs M = (3*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [34944]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [34944]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[546 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[546 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[564 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[564 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[582 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[582 + i]); } - p_res = &p_bnProcBufRes[54]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); + ((__m512i*)bnProcBufRes)[600 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[600 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m512i*)bnProcBufRes)[618 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[618 + i]); } - p_res = &p_bnProcBufRes[90]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[90 + i]); + ((__m512i*)bnProcBufRes)[636 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[636 + i]); } - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); + ((__m512i*)bnProcBufRes)[654 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[654 + i]); } - p_res = &p_bnProcBufRes[126]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[126 + i]); + ((__m512i*)bnProcBufRes)[672 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[672 + i]); } // Process group with 9 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [44160]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [44160]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [20352]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[690 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[318 + i ], ((__m512i*) bnProcBuf)[690 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [20352]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[696 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[318 + i ], ((__m512i*) bnProcBuf)[696 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [20352]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[702 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[318 + i ], ((__m512i*) bnProcBuf)[702 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [20352]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[708 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[318 + i ], ((__m512i*) bnProcBuf)[708 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [20352]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[714 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[318 + i ], ((__m512i*) bnProcBuf)[714 + i]); } - p_res = &p_bnProcBufRes[30]; - p_llrRes = (__m512i*) &llrRes [20352]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); + ((__m512i*)bnProcBufRes)[720 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[318 + i ], ((__m512i*) bnProcBuf)[720 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [20352]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[726 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[318 + i ], ((__m512i*) bnProcBuf)[726 + i]); } - p_res = &p_bnProcBufRes[42]; - p_llrRes = (__m512i*) &llrRes [20352]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); + ((__m512i*)bnProcBufRes)[732 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[318 + i ], ((__m512i*) bnProcBuf)[732 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m512i*) &llrRes [20352]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m512i*)bnProcBufRes)[738 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[318 + i ], ((__m512i*) bnProcBuf)[738 + i]); } // Process group with 10 CNs M = (4*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [47616]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [47616]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [20736]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[744 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[324 + i ], ((__m512i*) bnProcBuf)[744 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [20736]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[768 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[324 + i ], ((__m512i*) bnProcBuf)[768 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m512i*) &llrRes [20736]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m512i*)bnProcBufRes)[792 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[324 + i ], ((__m512i*) bnProcBuf)[792 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m512i*) &llrRes [20736]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m512i*)bnProcBufRes)[816 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[324 + i ], ((__m512i*) bnProcBuf)[816 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m512i*) &llrRes [20736]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m512i*)bnProcBufRes)[840 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[324 + i ], ((__m512i*) bnProcBuf)[840 + i]); } - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m512i*) &llrRes [20736]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); + ((__m512i*)bnProcBufRes)[864 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[324 + i ], ((__m512i*) bnProcBuf)[864 + i]); } - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m512i*) &llrRes [20736]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); + ((__m512i*)bnProcBufRes)[888 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[324 + i ], ((__m512i*) bnProcBuf)[888 + i]); } - p_res = &p_bnProcBufRes[168]; - p_llrRes = (__m512i*) &llrRes [20736]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]); + ((__m512i*)bnProcBufRes)[912 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[324 + i ], ((__m512i*) bnProcBuf)[912 + i]); } - p_res = &p_bnProcBufRes[192]; - p_llrRes = (__m512i*) &llrRes [20736]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[192 + i]); + ((__m512i*)bnProcBufRes)[936 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[324 + i ], ((__m512i*) bnProcBuf)[936 + i]); } - p_res = &p_bnProcBufRes[216]; - p_llrRes = (__m512i*) &llrRes [20736]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[216 + i]); + ((__m512i*)bnProcBufRes)[960 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[324 + i ], ((__m512i*) bnProcBuf)[960 + i]); } // Process group with 11 CNs M = (3*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [62976]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [62976]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [22272]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[984 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[348 + i ], ((__m512i*) bnProcBuf)[984 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [22272]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[1002 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[348 + i ], ((__m512i*) bnProcBuf)[1002 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [22272]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[1020 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[348 + i ], ((__m512i*) bnProcBuf)[1020 + i]); } - p_res = &p_bnProcBufRes[54]; - p_llrRes = (__m512i*) &llrRes [22272]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); + ((__m512i*)bnProcBufRes)[1038 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[348 + i ], ((__m512i*) bnProcBuf)[1038 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m512i*) &llrRes [22272]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m512i*)bnProcBufRes)[1056 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[348 + i ], ((__m512i*) bnProcBuf)[1056 + i]); } - p_res = &p_bnProcBufRes[90]; - p_llrRes = (__m512i*) &llrRes [22272]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[90 + i]); + ((__m512i*)bnProcBufRes)[1074 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[348 + i ], ((__m512i*) bnProcBuf)[1074 + i]); } - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m512i*) &llrRes [22272]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); + ((__m512i*)bnProcBufRes)[1092 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[348 + i ], ((__m512i*) bnProcBuf)[1092 + i]); } - p_res = &p_bnProcBufRes[126]; - p_llrRes = (__m512i*) &llrRes [22272]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[126 + i]); + ((__m512i*)bnProcBufRes)[1110 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[348 + i ], ((__m512i*) bnProcBuf)[1110 + i]); } - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m512i*) &llrRes [22272]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); + ((__m512i*)bnProcBufRes)[1128 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[348 + i ], ((__m512i*) bnProcBuf)[1128 + i]); } - p_res = &p_bnProcBufRes[162]; - p_llrRes = (__m512i*) &llrRes [22272]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[162 + i]); + ((__m512i*)bnProcBufRes)[1146 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[348 + i ], ((__m512i*) bnProcBuf)[1146 + i]); } - p_res = &p_bnProcBufRes[180]; - p_llrRes = (__m512i*) &llrRes [22272]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[180 + i]); + ((__m512i*)bnProcBufRes)[1164 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[348 + i ], ((__m512i*) bnProcBuf)[1164 + i]); } // Process group with 12 CNs M = (4*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [75648]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [75648]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [23424]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[1182 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[366 + i ], ((__m512i*) bnProcBuf)[1182 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [23424]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[1206 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[366 + i ], ((__m512i*) bnProcBuf)[1206 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m512i*) &llrRes [23424]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m512i*)bnProcBufRes)[1230 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[366 + i ], ((__m512i*) bnProcBuf)[1230 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m512i*) &llrRes [23424]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m512i*)bnProcBufRes)[1254 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[366 + i ], ((__m512i*) bnProcBuf)[1254 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m512i*) &llrRes [23424]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m512i*)bnProcBufRes)[1278 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[366 + i ], ((__m512i*) bnProcBuf)[1278 + i]); } - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m512i*) &llrRes [23424]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); + ((__m512i*)bnProcBufRes)[1302 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[366 + i ], ((__m512i*) bnProcBuf)[1302 + i]); } - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m512i*) &llrRes [23424]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); + ((__m512i*)bnProcBufRes)[1326 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[366 + i ], ((__m512i*) bnProcBuf)[1326 + i]); } - p_res = &p_bnProcBufRes[168]; - p_llrRes = (__m512i*) &llrRes [23424]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]); + ((__m512i*)bnProcBufRes)[1350 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[366 + i ], ((__m512i*) bnProcBuf)[1350 + i]); } - p_res = &p_bnProcBufRes[192]; - p_llrRes = (__m512i*) &llrRes [23424]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[192 + i]); + ((__m512i*)bnProcBufRes)[1374 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[366 + i ], ((__m512i*) bnProcBuf)[1374 + i]); } - p_res = &p_bnProcBufRes[216]; - p_llrRes = (__m512i*) &llrRes [23424]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[216 + i]); + ((__m512i*)bnProcBufRes)[1398 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[366 + i ], ((__m512i*) bnProcBuf)[1398 + i]); } - p_res = &p_bnProcBufRes[240]; - p_llrRes = (__m512i*) &llrRes [23424]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[240 + i]); + ((__m512i*)bnProcBufRes)[1422 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[366 + i ], ((__m512i*) bnProcBuf)[1422 + i]); } - p_res = &p_bnProcBufRes[264]; - p_llrRes = (__m512i*) &llrRes [23424]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[264 + i]); + ((__m512i*)bnProcBufRes)[1446 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[366 + i ], ((__m512i*) bnProcBuf)[1446 + i]); } // Process group with 13 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [94080]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [94080]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [24960]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[1470 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[390 + i ], ((__m512i*) bnProcBuf)[1470 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [24960]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[1476 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[390 + i ], ((__m512i*) bnProcBuf)[1476 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [24960]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[1482 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[390 + i ], ((__m512i*) bnProcBuf)[1482 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [24960]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[1488 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[390 + i ], ((__m512i*) bnProcBuf)[1488 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [24960]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[1494 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[390 + i ], ((__m512i*) bnProcBuf)[1494 + i]); } - p_res = &p_bnProcBufRes[30]; - p_llrRes = (__m512i*) &llrRes [24960]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); + ((__m512i*)bnProcBufRes)[1500 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[390 + i ], ((__m512i*) bnProcBuf)[1500 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [24960]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[1506 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[390 + i ], ((__m512i*) bnProcBuf)[1506 + i]); } - p_res = &p_bnProcBufRes[42]; - p_llrRes = (__m512i*) &llrRes [24960]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); + ((__m512i*)bnProcBufRes)[1512 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[390 + i ], ((__m512i*) bnProcBuf)[1512 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m512i*) &llrRes [24960]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m512i*)bnProcBufRes)[1518 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[390 + i ], ((__m512i*) bnProcBuf)[1518 + i]); } - p_res = &p_bnProcBufRes[54]; - p_llrRes = (__m512i*) &llrRes [24960]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); + ((__m512i*)bnProcBufRes)[1524 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[390 + i ], ((__m512i*) bnProcBuf)[1524 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m512i*) &llrRes [24960]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m512i*)bnProcBufRes)[1530 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[390 + i ], ((__m512i*) bnProcBuf)[1530 + i]); } - p_res = &p_bnProcBufRes[66]; - p_llrRes = (__m512i*) &llrRes [24960]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]); + ((__m512i*)bnProcBufRes)[1536 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[390 + i ], ((__m512i*) bnProcBuf)[1536 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m512i*) &llrRes [24960]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m512i*)bnProcBufRes)[1542 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[390 + i ], ((__m512i*) bnProcBuf)[1542 + i]); } // Process group with 14 CNs // Process group with 15 CNs @@ -487,301 +293,181 @@ static inline void nrLDPC_bnProc_BG1_R13_AVX512(int8_t* bnProcBuf,int8_t* bnProc // Process group with 27 CNs // Process group with 28 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [99072]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [99072]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[1548 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1548 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[1554 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1554 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[1560 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1560 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[1566 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1566 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[1572 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1572 + i]); } - p_res = &p_bnProcBufRes[30]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); + ((__m512i*)bnProcBufRes)[1578 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1578 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[1584 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1584 + i]); } - p_res = &p_bnProcBufRes[42]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); + ((__m512i*)bnProcBufRes)[1590 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1590 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m512i*)bnProcBufRes)[1596 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1596 + i]); } - p_res = &p_bnProcBufRes[54]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); + ((__m512i*)bnProcBufRes)[1602 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1602 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m512i*)bnProcBufRes)[1608 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1608 + i]); } - p_res = &p_bnProcBufRes[66]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]); + ((__m512i*)bnProcBufRes)[1614 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1614 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m512i*)bnProcBufRes)[1620 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1620 + i]); } - p_res = &p_bnProcBufRes[78]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[78 + i]); + ((__m512i*)bnProcBufRes)[1626 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1626 + i]); } - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); + ((__m512i*)bnProcBufRes)[1632 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1632 + i]); } - p_res = &p_bnProcBufRes[90]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[90 + i]); + ((__m512i*)bnProcBufRes)[1638 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1638 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m512i*)bnProcBufRes)[1644 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1644 + i]); } - p_res = &p_bnProcBufRes[102]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[102 + i]); + ((__m512i*)bnProcBufRes)[1650 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1650 + i]); } - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); + ((__m512i*)bnProcBufRes)[1656 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1656 + i]); } - p_res = &p_bnProcBufRes[114]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[114 + i]); + ((__m512i*)bnProcBufRes)[1662 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1662 + i]); } - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); + ((__m512i*)bnProcBufRes)[1668 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1668 + i]); } - p_res = &p_bnProcBufRes[126]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[126 + i]); + ((__m512i*)bnProcBufRes)[1674 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1674 + i]); } - p_res = &p_bnProcBufRes[132]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); + ((__m512i*)bnProcBufRes)[1680 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1680 + i]); } - p_res = &p_bnProcBufRes[138]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[138 + i]); + ((__m512i*)bnProcBufRes)[1686 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1686 + i]); } - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); + ((__m512i*)bnProcBufRes)[1692 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1692 + i]); } - p_res = &p_bnProcBufRes[150]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[150 + i]); + ((__m512i*)bnProcBufRes)[1698 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1698 + i]); } - p_res = &p_bnProcBufRes[156]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]); + ((__m512i*)bnProcBufRes)[1704 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1704 + i]); } - p_res = &p_bnProcBufRes[162]; - p_llrRes = (__m512i*) &llrRes [25344]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[162 + i]); + ((__m512i*)bnProcBufRes)[1710 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[396 + i ], ((__m512i*) bnProcBuf)[1710 + i]); } // Process group with 29 CNs // Process group with 30 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [109824]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [109824]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[1716 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1716 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[1722 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1722 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[1728 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1728 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[1734 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1734 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[1740 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1740 + i]); } - p_res = &p_bnProcBufRes[30]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); + ((__m512i*)bnProcBufRes)[1746 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1746 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[1752 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1752 + i]); } - p_res = &p_bnProcBufRes[42]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); + ((__m512i*)bnProcBufRes)[1758 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1758 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m512i*)bnProcBufRes)[1764 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1764 + i]); } - p_res = &p_bnProcBufRes[54]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); + ((__m512i*)bnProcBufRes)[1770 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1770 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m512i*)bnProcBufRes)[1776 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1776 + i]); } - p_res = &p_bnProcBufRes[66]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]); + ((__m512i*)bnProcBufRes)[1782 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1782 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m512i*)bnProcBufRes)[1788 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1788 + i]); } - p_res = &p_bnProcBufRes[78]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[78 + i]); + ((__m512i*)bnProcBufRes)[1794 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1794 + i]); } - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); + ((__m512i*)bnProcBufRes)[1800 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1800 + i]); } - p_res = &p_bnProcBufRes[90]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[90 + i]); + ((__m512i*)bnProcBufRes)[1806 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1806 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m512i*)bnProcBufRes)[1812 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1812 + i]); } - p_res = &p_bnProcBufRes[102]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[102 + i]); + ((__m512i*)bnProcBufRes)[1818 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1818 + i]); } - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); + ((__m512i*)bnProcBufRes)[1824 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1824 + i]); } - p_res = &p_bnProcBufRes[114]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[114 + i]); + ((__m512i*)bnProcBufRes)[1830 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1830 + i]); } - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); + ((__m512i*)bnProcBufRes)[1836 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1836 + i]); } - p_res = &p_bnProcBufRes[126]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[126 + i]); + ((__m512i*)bnProcBufRes)[1842 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1842 + i]); } - p_res = &p_bnProcBufRes[132]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); + ((__m512i*)bnProcBufRes)[1848 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1848 + i]); } - p_res = &p_bnProcBufRes[138]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[138 + i]); + ((__m512i*)bnProcBufRes)[1854 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1854 + i]); } - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); + ((__m512i*)bnProcBufRes)[1860 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1860 + i]); } - p_res = &p_bnProcBufRes[150]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[150 + i]); + ((__m512i*)bnProcBufRes)[1866 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1866 + i]); } - p_res = &p_bnProcBufRes[156]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]); + ((__m512i*)bnProcBufRes)[1872 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1872 + i]); } - p_res = &p_bnProcBufRes[162]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[162 + i]); + ((__m512i*)bnProcBufRes)[1878 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1878 + i]); } - p_res = &p_bnProcBufRes[168]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]); + ((__m512i*)bnProcBufRes)[1884 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1884 + i]); } - p_res = &p_bnProcBufRes[174]; - p_llrRes = (__m512i*) &llrRes [25728]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[174 + i]); + ((__m512i*)bnProcBufRes)[1890 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[402 + i ], ((__m512i*) bnProcBuf)[1890 + i]); } } diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R23_AVX512.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R23_AVX512.h index bb97cb33475f4d7d4e4466f050fd1c1d4dc0004c..7e005a6defd78051c5e2da6bbb4d4743e14b3d54 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R23_AVX512.h +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R23_AVX512.h @@ -1,128 +1,74 @@ static inline void nrLDPC_bnProc_BG1_R23_AVX512(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) { - __m512i* p_bnProcBuf; - __m512i* p_bnProcBufRes; - __m512i* p_llrRes; - __m512i* p_res; uint32_t M, i; // Process group with 2 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [3456]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [3456]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [3456]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[54 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[54 + i ], ((__m512i*) bnProcBuf)[54 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [3456]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[60 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[54 + i ], ((__m512i*) bnProcBuf)[60 + i]); } // Process group with 3 CNs M = (5*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [4224]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [4224]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [3840]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[66 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[60 + i ], ((__m512i*) bnProcBuf)[66 + i]); } - p_res = &p_bnProcBufRes[30]; - p_llrRes = (__m512i*) &llrRes [3840]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); + ((__m512i*)bnProcBufRes)[96 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[60 + i ], ((__m512i*) bnProcBuf)[96 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m512i*) &llrRes [3840]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m512i*)bnProcBufRes)[126 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[60 + i ], ((__m512i*) bnProcBuf)[126 + i]); } // Process group with 4 CNs M = (3*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [9984]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [9984]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [5760]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[156 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[90 + i ], ((__m512i*) bnProcBuf)[156 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [5760]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[174 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[90 + i ], ((__m512i*) bnProcBuf)[174 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [5760]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[192 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[90 + i ], ((__m512i*) bnProcBuf)[192 + i]); } - p_res = &p_bnProcBufRes[54]; - p_llrRes = (__m512i*) &llrRes [5760]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); + ((__m512i*)bnProcBufRes)[210 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[90 + i ], ((__m512i*) bnProcBuf)[210 + i]); } // Process group with 5 CNs M = (7*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [14592]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [14592]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [6912]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[228 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[108 + i ], ((__m512i*) bnProcBuf)[228 + i]); } - p_res = &p_bnProcBufRes[42]; - p_llrRes = (__m512i*) &llrRes [6912]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); + ((__m512i*)bnProcBufRes)[270 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[108 + i ], ((__m512i*) bnProcBuf)[270 + i]); } - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m512i*) &llrRes [6912]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); + ((__m512i*)bnProcBufRes)[312 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[108 + i ], ((__m512i*) bnProcBuf)[312 + i]); } - p_res = &p_bnProcBufRes[126]; - p_llrRes = (__m512i*) &llrRes [6912]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[126 + i]); + ((__m512i*)bnProcBufRes)[354 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[108 + i ], ((__m512i*) bnProcBuf)[354 + i]); } - p_res = &p_bnProcBufRes[168]; - p_llrRes = (__m512i*) &llrRes [6912]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]); + ((__m512i*)bnProcBufRes)[396 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[108 + i ], ((__m512i*) bnProcBuf)[396 + i]); } // Process group with 6 CNs M = (8*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [28032]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [28032]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [9600]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[438 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[150 + i ], ((__m512i*) bnProcBuf)[438 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m512i*) &llrRes [9600]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m512i*)bnProcBufRes)[486 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[150 + i ], ((__m512i*) bnProcBuf)[486 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m512i*) &llrRes [9600]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m512i*)bnProcBufRes)[534 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[150 + i ], ((__m512i*) bnProcBuf)[534 + i]); } - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m512i*) &llrRes [9600]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); + ((__m512i*)bnProcBufRes)[582 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[150 + i ], ((__m512i*) bnProcBuf)[582 + i]); } - p_res = &p_bnProcBufRes[192]; - p_llrRes = (__m512i*) &llrRes [9600]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[192 + i]); + ((__m512i*)bnProcBufRes)[630 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[150 + i ], ((__m512i*) bnProcBuf)[630 + i]); } - p_res = &p_bnProcBufRes[240]; - p_llrRes = (__m512i*) &llrRes [9600]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[240 + i]); + ((__m512i*)bnProcBufRes)[678 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[150 + i ], ((__m512i*) bnProcBuf)[678 + i]); } // Process group with 7 CNs // Process group with 8 CNs @@ -130,126 +76,76 @@ static inline void nrLDPC_bnProc_BG1_R23_AVX512(int8_t* bnProcBuf,int8_t* bnProc // Process group with 10 CNs // Process group with 11 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [46464]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [46464]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [12672]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[726 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[198 + i ], ((__m512i*) bnProcBuf)[726 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [12672]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[732 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[198 + i ], ((__m512i*) bnProcBuf)[732 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [12672]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[738 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[198 + i ], ((__m512i*) bnProcBuf)[738 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [12672]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[744 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[198 + i ], ((__m512i*) bnProcBuf)[744 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [12672]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[750 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[198 + i ], ((__m512i*) bnProcBuf)[750 + i]); } - p_res = &p_bnProcBufRes[30]; - p_llrRes = (__m512i*) &llrRes [12672]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); + ((__m512i*)bnProcBufRes)[756 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[198 + i ], ((__m512i*) bnProcBuf)[756 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [12672]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[762 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[198 + i ], ((__m512i*) bnProcBuf)[762 + i]); } - p_res = &p_bnProcBufRes[42]; - p_llrRes = (__m512i*) &llrRes [12672]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); + ((__m512i*)bnProcBufRes)[768 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[198 + i ], ((__m512i*) bnProcBuf)[768 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m512i*) &llrRes [12672]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m512i*)bnProcBufRes)[774 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[198 + i ], ((__m512i*) bnProcBuf)[774 + i]); } - p_res = &p_bnProcBufRes[54]; - p_llrRes = (__m512i*) &llrRes [12672]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); + ((__m512i*)bnProcBufRes)[780 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[198 + i ], ((__m512i*) bnProcBuf)[780 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m512i*) &llrRes [12672]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m512i*)bnProcBufRes)[786 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[198 + i ], ((__m512i*) bnProcBuf)[786 + i]); } // Process group with 12 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [50688]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [50688]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [13056]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[792 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[204 + i ], ((__m512i*) bnProcBuf)[792 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [13056]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[798 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[204 + i ], ((__m512i*) bnProcBuf)[798 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [13056]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[804 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[204 + i ], ((__m512i*) bnProcBuf)[804 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [13056]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[810 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[204 + i ], ((__m512i*) bnProcBuf)[810 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [13056]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[816 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[204 + i ], ((__m512i*) bnProcBuf)[816 + i]); } - p_res = &p_bnProcBufRes[30]; - p_llrRes = (__m512i*) &llrRes [13056]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); + ((__m512i*)bnProcBufRes)[822 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[204 + i ], ((__m512i*) bnProcBuf)[822 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [13056]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[828 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[204 + i ], ((__m512i*) bnProcBuf)[828 + i]); } - p_res = &p_bnProcBufRes[42]; - p_llrRes = (__m512i*) &llrRes [13056]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); + ((__m512i*)bnProcBufRes)[834 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[204 + i ], ((__m512i*) bnProcBuf)[834 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m512i*) &llrRes [13056]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m512i*)bnProcBufRes)[840 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[204 + i ], ((__m512i*) bnProcBuf)[840 + i]); } - p_res = &p_bnProcBufRes[54]; - p_llrRes = (__m512i*) &llrRes [13056]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); + ((__m512i*)bnProcBufRes)[846 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[204 + i ], ((__m512i*) bnProcBuf)[846 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m512i*) &llrRes [13056]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m512i*)bnProcBufRes)[852 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[204 + i ], ((__m512i*) bnProcBuf)[852 + i]); } - p_res = &p_bnProcBufRes[66]; - p_llrRes = (__m512i*) &llrRes [13056]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]); + ((__m512i*)bnProcBufRes)[858 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[204 + i ], ((__m512i*) bnProcBuf)[858 + i]); } // Process group with 13 CNs // Process group with 14 CNs diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R89_AVX512.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R89_AVX512.h index 96006f76cfe22515cfca6b7a51369a3b5a71fbc4..a8074682f83ecdb4a327cd0ae745720d9e056348 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R89_AVX512.h +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R89_AVX512.h @@ -1,94 +1,54 @@ static inline void nrLDPC_bnProc_BG1_R89_AVX512(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) { - __m512i* p_bnProcBuf; - __m512i* p_bnProcBufRes; - __m512i* p_llrRes; - __m512i* p_res; uint32_t M, i; // Process group with 2 CNs M = (3*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [384]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [384]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [384]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[6 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[6 + i ], ((__m512i*) bnProcBuf)[6 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [384]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[24 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[6 + i ], ((__m512i*) bnProcBuf)[24 + i]); } // Process group with 3 CNs M = (21*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [2688]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [2688]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [1536]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[42 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[24 + i ], ((__m512i*) bnProcBuf)[42 + i]); } - p_res = &p_bnProcBufRes[126]; - p_llrRes = (__m512i*) &llrRes [1536]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[126 + i]); + ((__m512i*)bnProcBufRes)[168 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[24 + i ], ((__m512i*) bnProcBuf)[168 + i]); } - p_res = &p_bnProcBufRes[252]; - p_llrRes = (__m512i*) &llrRes [1536]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[252 + i]); + ((__m512i*)bnProcBufRes)[294 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[24 + i ], ((__m512i*) bnProcBuf)[294 + i]); } // Process group with 4 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [26880]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [26880]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [9600]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[420 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[150 + i ], ((__m512i*) bnProcBuf)[420 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [9600]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[426 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[150 + i ], ((__m512i*) bnProcBuf)[426 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [9600]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[432 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[150 + i ], ((__m512i*) bnProcBuf)[432 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [9600]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[438 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[150 + i ], ((__m512i*) bnProcBuf)[438 + i]); } // Process group with 5 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [28416]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [28416]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [9984]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[444 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[156 + i ], ((__m512i*) bnProcBuf)[444 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [9984]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[450 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[156 + i ], ((__m512i*) bnProcBuf)[450 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [9984]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[456 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[156 + i ], ((__m512i*) bnProcBuf)[456 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [9984]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[462 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[156 + i ], ((__m512i*) bnProcBuf)[462 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [9984]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[468 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[156 + i ], ((__m512i*) bnProcBuf)[468 + i]); } // Process group with 6 CNs // Process group with 7 CNs diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R13_AVX512.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R13_AVX512.h index b3a701584b204ca44c254acfef6999aea56d89ef..2f2ea4ba493d9922dd14f624ed6d5b636d9e0c57 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R13_AVX512.h +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R13_AVX512.h @@ -1,195 +1,115 @@ #include <stdint.h> #include <immintrin.h> void nrLDPC_bnProc_BG2_R13_AVX512(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) { - __m512i* p_bnProcBuf; - __m512i* p_bnProcBufRes; - __m512i* p_llrRes; - __m512i* p_res; uint32_t M, i; // Process group with 2 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [6912]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [6912]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [6912]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[108 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[108 + i ], ((__m512i*) bnProcBuf)[108 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [6912]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[114 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[108 + i ], ((__m512i*) bnProcBuf)[114 + i]); } // Process group with 3 CNs // Process group with 4 CNs M = (2*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [7680]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [7680]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [7296]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[120 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[114 + i ], ((__m512i*) bnProcBuf)[120 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [7296]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[132 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[114 + i ], ((__m512i*) bnProcBuf)[132 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [7296]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[144 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[114 + i ], ((__m512i*) bnProcBuf)[144 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [7296]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[156 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[114 + i ], ((__m512i*) bnProcBuf)[156 + i]); } // Process group with 5 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [10752]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [10752]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [8064]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[168 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[126 + i ], ((__m512i*) bnProcBuf)[168 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [8064]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[174 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[126 + i ], ((__m512i*) bnProcBuf)[174 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [8064]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[180 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[126 + i ], ((__m512i*) bnProcBuf)[180 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [8064]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[186 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[126 + i ], ((__m512i*) bnProcBuf)[186 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [8064]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[192 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[126 + i ], ((__m512i*) bnProcBuf)[192 + i]); } // Process group with 6 CNs M = (5*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [12672]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [12672]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [8448]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[198 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[132 + i ], ((__m512i*) bnProcBuf)[198 + i]); } - p_res = &p_bnProcBufRes[30]; - p_llrRes = (__m512i*) &llrRes [8448]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); + ((__m512i*)bnProcBufRes)[228 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[132 + i ], ((__m512i*) bnProcBuf)[228 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m512i*) &llrRes [8448]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m512i*)bnProcBufRes)[258 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[132 + i ], ((__m512i*) bnProcBuf)[258 + i]); } - p_res = &p_bnProcBufRes[90]; - p_llrRes = (__m512i*) &llrRes [8448]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[90 + i]); + ((__m512i*)bnProcBufRes)[288 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[132 + i ], ((__m512i*) bnProcBuf)[288 + i]); } - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m512i*) &llrRes [8448]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); + ((__m512i*)bnProcBufRes)[318 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[132 + i ], ((__m512i*) bnProcBuf)[318 + i]); } - p_res = &p_bnProcBufRes[150]; - p_llrRes = (__m512i*) &llrRes [8448]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[150 + i]); + ((__m512i*)bnProcBufRes)[348 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[132 + i ], ((__m512i*) bnProcBuf)[348 + i]); } // Process group with 7 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [24192]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [24192]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [10368]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[378 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[162 + i ], ((__m512i*) bnProcBuf)[378 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [10368]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[384 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[162 + i ], ((__m512i*) bnProcBuf)[384 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [10368]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[390 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[162 + i ], ((__m512i*) bnProcBuf)[390 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [10368]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[396 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[162 + i ], ((__m512i*) bnProcBuf)[396 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [10368]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[402 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[162 + i ], ((__m512i*) bnProcBuf)[402 + i]); } - p_res = &p_bnProcBufRes[30]; - p_llrRes = (__m512i*) &llrRes [10368]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); + ((__m512i*)bnProcBufRes)[408 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[162 + i ], ((__m512i*) bnProcBuf)[408 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [10368]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[414 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[162 + i ], ((__m512i*) bnProcBuf)[414 + i]); } // Process group with 8 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [26880]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [26880]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [10752]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[420 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[168 + i ], ((__m512i*) bnProcBuf)[420 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [10752]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[426 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[168 + i ], ((__m512i*) bnProcBuf)[426 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [10752]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[432 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[168 + i ], ((__m512i*) bnProcBuf)[432 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [10752]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[438 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[168 + i ], ((__m512i*) bnProcBuf)[438 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [10752]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[444 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[168 + i ], ((__m512i*) bnProcBuf)[444 + i]); } - p_res = &p_bnProcBufRes[30]; - p_llrRes = (__m512i*) &llrRes [10752]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); + ((__m512i*)bnProcBufRes)[450 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[168 + i ], ((__m512i*) bnProcBuf)[450 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [10752]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[456 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[168 + i ], ((__m512i*) bnProcBuf)[456 + i]); } - p_res = &p_bnProcBufRes[42]; - p_llrRes = (__m512i*) &llrRes [10752]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); + ((__m512i*)bnProcBufRes)[462 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[168 + i ], ((__m512i*) bnProcBuf)[462 + i]); } // Process group with 9 CNs // Process group with 10 CNs @@ -197,231 +117,139 @@ void nrLDPC_bnProc_BG2_R13_AVX512(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_ // Process group with 12 CNs // Process group with 13 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [29952]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [29952]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [11136]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[468 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[174 + i ], ((__m512i*) bnProcBuf)[468 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [11136]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[474 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[174 + i ], ((__m512i*) bnProcBuf)[474 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [11136]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[480 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[174 + i ], ((__m512i*) bnProcBuf)[480 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [11136]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[486 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[174 + i ], ((__m512i*) bnProcBuf)[486 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [11136]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[492 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[174 + i ], ((__m512i*) bnProcBuf)[492 + i]); } - p_res = &p_bnProcBufRes[30]; - p_llrRes = (__m512i*) &llrRes [11136]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); + ((__m512i*)bnProcBufRes)[498 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[174 + i ], ((__m512i*) bnProcBuf)[498 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [11136]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[504 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[174 + i ], ((__m512i*) bnProcBuf)[504 + i]); } - p_res = &p_bnProcBufRes[42]; - p_llrRes = (__m512i*) &llrRes [11136]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); + ((__m512i*)bnProcBufRes)[510 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[174 + i ], ((__m512i*) bnProcBuf)[510 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m512i*) &llrRes [11136]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m512i*)bnProcBufRes)[516 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[174 + i ], ((__m512i*) bnProcBuf)[516 + i]); } - p_res = &p_bnProcBufRes[54]; - p_llrRes = (__m512i*) &llrRes [11136]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); + ((__m512i*)bnProcBufRes)[522 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[174 + i ], ((__m512i*) bnProcBuf)[522 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m512i*) &llrRes [11136]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m512i*)bnProcBufRes)[528 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[174 + i ], ((__m512i*) bnProcBuf)[528 + i]); } - p_res = &p_bnProcBufRes[66]; - p_llrRes = (__m512i*) &llrRes [11136]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]); + ((__m512i*)bnProcBufRes)[534 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[174 + i ], ((__m512i*) bnProcBuf)[534 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m512i*) &llrRes [11136]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m512i*)bnProcBufRes)[540 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[174 + i ], ((__m512i*) bnProcBuf)[540 + i]); } // Process group with 14 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [34944]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [34944]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[546 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[180 + i ], ((__m512i*) bnProcBuf)[546 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[552 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[180 + i ], ((__m512i*) bnProcBuf)[552 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[558 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[180 + i ], ((__m512i*) bnProcBuf)[558 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[564 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[180 + i ], ((__m512i*) bnProcBuf)[564 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[570 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[180 + i ], ((__m512i*) bnProcBuf)[570 + i]); } - p_res = &p_bnProcBufRes[30]; - p_llrRes = (__m512i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); + ((__m512i*)bnProcBufRes)[576 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[180 + i ], ((__m512i*) bnProcBuf)[576 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[582 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[180 + i ], ((__m512i*) bnProcBuf)[582 + i]); } - p_res = &p_bnProcBufRes[42]; - p_llrRes = (__m512i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); + ((__m512i*)bnProcBufRes)[588 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[180 + i ], ((__m512i*) bnProcBuf)[588 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m512i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m512i*)bnProcBufRes)[594 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[180 + i ], ((__m512i*) bnProcBuf)[594 + i]); } - p_res = &p_bnProcBufRes[54]; - p_llrRes = (__m512i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); + ((__m512i*)bnProcBufRes)[600 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[180 + i ], ((__m512i*) bnProcBuf)[600 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m512i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m512i*)bnProcBufRes)[606 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[180 + i ], ((__m512i*) bnProcBuf)[606 + i]); } - p_res = &p_bnProcBufRes[66]; - p_llrRes = (__m512i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]); + ((__m512i*)bnProcBufRes)[612 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[180 + i ], ((__m512i*) bnProcBuf)[612 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m512i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m512i*)bnProcBufRes)[618 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[180 + i ], ((__m512i*) bnProcBuf)[618 + i]); } - p_res = &p_bnProcBufRes[78]; - p_llrRes = (__m512i*) &llrRes [11520]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[78 + i]); + ((__m512i*)bnProcBufRes)[624 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[180 + i ], ((__m512i*) bnProcBuf)[624 + i]); } // Process group with 15 CNs // Process group with 16 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [40320]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [40320]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[630 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[186 + i ], ((__m512i*) bnProcBuf)[630 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[636 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[186 + i ], ((__m512i*) bnProcBuf)[636 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[642 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[186 + i ], ((__m512i*) bnProcBuf)[642 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[648 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[186 + i ], ((__m512i*) bnProcBuf)[648 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[654 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[186 + i ], ((__m512i*) bnProcBuf)[654 + i]); } - p_res = &p_bnProcBufRes[30]; - p_llrRes = (__m512i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); + ((__m512i*)bnProcBufRes)[660 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[186 + i ], ((__m512i*) bnProcBuf)[660 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[666 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[186 + i ], ((__m512i*) bnProcBuf)[666 + i]); } - p_res = &p_bnProcBufRes[42]; - p_llrRes = (__m512i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); + ((__m512i*)bnProcBufRes)[672 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[186 + i ], ((__m512i*) bnProcBuf)[672 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m512i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m512i*)bnProcBufRes)[678 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[186 + i ], ((__m512i*) bnProcBuf)[678 + i]); } - p_res = &p_bnProcBufRes[54]; - p_llrRes = (__m512i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); + ((__m512i*)bnProcBufRes)[684 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[186 + i ], ((__m512i*) bnProcBuf)[684 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m512i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m512i*)bnProcBufRes)[690 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[186 + i ], ((__m512i*) bnProcBuf)[690 + i]); } - p_res = &p_bnProcBufRes[66]; - p_llrRes = (__m512i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]); + ((__m512i*)bnProcBufRes)[696 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[186 + i ], ((__m512i*) bnProcBuf)[696 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m512i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m512i*)bnProcBufRes)[702 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[186 + i ], ((__m512i*) bnProcBuf)[702 + i]); } - p_res = &p_bnProcBufRes[78]; - p_llrRes = (__m512i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[78 + i]); + ((__m512i*)bnProcBufRes)[708 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[186 + i ], ((__m512i*) bnProcBuf)[708 + i]); } - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m512i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); + ((__m512i*)bnProcBufRes)[714 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[186 + i ], ((__m512i*) bnProcBuf)[714 + i]); } - p_res = &p_bnProcBufRes[90]; - p_llrRes = (__m512i*) &llrRes [11904]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[90 + i]); + ((__m512i*)bnProcBufRes)[720 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[186 + i ], ((__m512i*) bnProcBuf)[720 + i]); } // Process group with 17 CNs // Process group with 18 CNs diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R15_AVX512.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R15_AVX512.h index 4c56fca2f3d8c5234b510dba5dd0fd6886606d62..bb33d9316c5db5735c1a3382790f971e4bdccd23 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R15_AVX512.h +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R15_AVX512.h @@ -1,555 +1,331 @@ #include <stdint.h> #include <immintrin.h> void nrLDPC_bnProc_BG2_R15_AVX512(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) { - __m512i* p_bnProcBuf; - __m512i* p_bnProcBufRes; - __m512i* p_llrRes; - __m512i* p_res; uint32_t M, i; // Process group with 2 CNs // Process group with 3 CNs // Process group with 4 CNs // Process group with 5 CNs M = (2*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [14592]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [14592]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [14592]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[228 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[228 + i ], ((__m512i*) bnProcBuf)[228 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [14592]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[240 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[228 + i ], ((__m512i*) bnProcBuf)[240 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [14592]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[252 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[228 + i ], ((__m512i*) bnProcBuf)[252 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [14592]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[264 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[228 + i ], ((__m512i*) bnProcBuf)[264 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m512i*) &llrRes [14592]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m512i*)bnProcBufRes)[276 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[228 + i ], ((__m512i*) bnProcBuf)[276 + i]); } // Process group with 6 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [18432]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [18432]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [15360]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[288 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[240 + i ], ((__m512i*) bnProcBuf)[288 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [15360]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[294 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[240 + i ], ((__m512i*) bnProcBuf)[294 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [15360]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[300 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[240 + i ], ((__m512i*) bnProcBuf)[300 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [15360]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[306 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[240 + i ], ((__m512i*) bnProcBuf)[306 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [15360]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[312 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[240 + i ], ((__m512i*) bnProcBuf)[312 + i]); } - p_res = &p_bnProcBufRes[30]; - p_llrRes = (__m512i*) &llrRes [15360]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); + ((__m512i*)bnProcBufRes)[318 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[240 + i ], ((__m512i*) bnProcBuf)[318 + i]); } // Process group with 7 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [20736]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [20736]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [15744]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[324 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[246 + i ], ((__m512i*) bnProcBuf)[324 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [15744]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[330 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[246 + i ], ((__m512i*) bnProcBuf)[330 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [15744]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[336 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[246 + i ], ((__m512i*) bnProcBuf)[336 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [15744]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[342 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[246 + i ], ((__m512i*) bnProcBuf)[342 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [15744]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[348 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[246 + i ], ((__m512i*) bnProcBuf)[348 + i]); } - p_res = &p_bnProcBufRes[30]; - p_llrRes = (__m512i*) &llrRes [15744]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); + ((__m512i*)bnProcBufRes)[354 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[246 + i ], ((__m512i*) bnProcBuf)[354 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [15744]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[360 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[246 + i ], ((__m512i*) bnProcBuf)[360 + i]); } // Process group with 8 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [23424]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [23424]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [16128]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[366 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[252 + i ], ((__m512i*) bnProcBuf)[366 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [16128]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[372 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[252 + i ], ((__m512i*) bnProcBuf)[372 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [16128]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[378 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[252 + i ], ((__m512i*) bnProcBuf)[378 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [16128]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[384 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[252 + i ], ((__m512i*) bnProcBuf)[384 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [16128]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[390 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[252 + i ], ((__m512i*) bnProcBuf)[390 + i]); } - p_res = &p_bnProcBufRes[30]; - p_llrRes = (__m512i*) &llrRes [16128]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); + ((__m512i*)bnProcBufRes)[396 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[252 + i ], ((__m512i*) bnProcBuf)[396 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [16128]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[402 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[252 + i ], ((__m512i*) bnProcBuf)[402 + i]); } - p_res = &p_bnProcBufRes[42]; - p_llrRes = (__m512i*) &llrRes [16128]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); + ((__m512i*)bnProcBufRes)[408 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[252 + i ], ((__m512i*) bnProcBuf)[408 + i]); } // Process group with 9 CNs M = (2*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [26496]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [26496]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[414 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[258 + i ], ((__m512i*) bnProcBuf)[414 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[426 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[258 + i ], ((__m512i*) bnProcBuf)[426 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[438 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[258 + i ], ((__m512i*) bnProcBuf)[438 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[450 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[258 + i ], ((__m512i*) bnProcBuf)[450 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m512i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m512i*)bnProcBufRes)[462 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[258 + i ], ((__m512i*) bnProcBuf)[462 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m512i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m512i*)bnProcBufRes)[474 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[258 + i ], ((__m512i*) bnProcBuf)[474 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m512i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m512i*)bnProcBufRes)[486 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[258 + i ], ((__m512i*) bnProcBuf)[486 + i]); } - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m512i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); + ((__m512i*)bnProcBufRes)[498 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[258 + i ], ((__m512i*) bnProcBuf)[498 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m512i*) &llrRes [16512]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m512i*)bnProcBufRes)[510 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[258 + i ], ((__m512i*) bnProcBuf)[510 + i]); } // Process group with 10 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [33408]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [33408]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [17280]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[522 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[270 + i ], ((__m512i*) bnProcBuf)[522 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [17280]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[528 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[270 + i ], ((__m512i*) bnProcBuf)[528 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [17280]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[534 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[270 + i ], ((__m512i*) bnProcBuf)[534 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [17280]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[540 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[270 + i ], ((__m512i*) bnProcBuf)[540 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [17280]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[546 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[270 + i ], ((__m512i*) bnProcBuf)[546 + i]); } - p_res = &p_bnProcBufRes[30]; - p_llrRes = (__m512i*) &llrRes [17280]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); + ((__m512i*)bnProcBufRes)[552 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[270 + i ], ((__m512i*) bnProcBuf)[552 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [17280]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[558 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[270 + i ], ((__m512i*) bnProcBuf)[558 + i]); } - p_res = &p_bnProcBufRes[42]; - p_llrRes = (__m512i*) &llrRes [17280]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); + ((__m512i*)bnProcBufRes)[564 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[270 + i ], ((__m512i*) bnProcBuf)[564 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m512i*) &llrRes [17280]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m512i*)bnProcBufRes)[570 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[270 + i ], ((__m512i*) bnProcBuf)[570 + i]); } - p_res = &p_bnProcBufRes[54]; - p_llrRes = (__m512i*) &llrRes [17280]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); + ((__m512i*)bnProcBufRes)[576 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[270 + i ], ((__m512i*) bnProcBuf)[576 + i]); } // Process group with 11 CNs // Process group with 12 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [37248]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [37248]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[582 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[276 + i ], ((__m512i*) bnProcBuf)[582 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[588 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[276 + i ], ((__m512i*) bnProcBuf)[588 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[594 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[276 + i ], ((__m512i*) bnProcBuf)[594 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[600 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[276 + i ], ((__m512i*) bnProcBuf)[600 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[606 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[276 + i ], ((__m512i*) bnProcBuf)[606 + i]); } - p_res = &p_bnProcBufRes[30]; - p_llrRes = (__m512i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); + ((__m512i*)bnProcBufRes)[612 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[276 + i ], ((__m512i*) bnProcBuf)[612 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[618 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[276 + i ], ((__m512i*) bnProcBuf)[618 + i]); } - p_res = &p_bnProcBufRes[42]; - p_llrRes = (__m512i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); + ((__m512i*)bnProcBufRes)[624 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[276 + i ], ((__m512i*) bnProcBuf)[624 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m512i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m512i*)bnProcBufRes)[630 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[276 + i ], ((__m512i*) bnProcBuf)[630 + i]); } - p_res = &p_bnProcBufRes[54]; - p_llrRes = (__m512i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); + ((__m512i*)bnProcBufRes)[636 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[276 + i ], ((__m512i*) bnProcBuf)[636 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m512i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m512i*)bnProcBufRes)[642 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[276 + i ], ((__m512i*) bnProcBuf)[642 + i]); } - p_res = &p_bnProcBufRes[66]; - p_llrRes = (__m512i*) &llrRes [17664]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]); + ((__m512i*)bnProcBufRes)[648 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[276 + i ], ((__m512i*) bnProcBuf)[648 + i]); } // Process group with 13 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [41856]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [41856]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [18048]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[654 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[282 + i ], ((__m512i*) bnProcBuf)[654 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [18048]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[660 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[282 + i ], ((__m512i*) bnProcBuf)[660 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [18048]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[666 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[282 + i ], ((__m512i*) bnProcBuf)[666 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [18048]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[672 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[282 + i ], ((__m512i*) bnProcBuf)[672 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [18048]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[678 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[282 + i ], ((__m512i*) bnProcBuf)[678 + i]); } - p_res = &p_bnProcBufRes[30]; - p_llrRes = (__m512i*) &llrRes [18048]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); + ((__m512i*)bnProcBufRes)[684 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[282 + i ], ((__m512i*) bnProcBuf)[684 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [18048]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[690 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[282 + i ], ((__m512i*) bnProcBuf)[690 + i]); } - p_res = &p_bnProcBufRes[42]; - p_llrRes = (__m512i*) &llrRes [18048]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); + ((__m512i*)bnProcBufRes)[696 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[282 + i ], ((__m512i*) bnProcBuf)[696 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m512i*) &llrRes [18048]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m512i*)bnProcBufRes)[702 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[282 + i ], ((__m512i*) bnProcBuf)[702 + i]); } - p_res = &p_bnProcBufRes[54]; - p_llrRes = (__m512i*) &llrRes [18048]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); + ((__m512i*)bnProcBufRes)[708 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[282 + i ], ((__m512i*) bnProcBuf)[708 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m512i*) &llrRes [18048]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m512i*)bnProcBufRes)[714 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[282 + i ], ((__m512i*) bnProcBuf)[714 + i]); } - p_res = &p_bnProcBufRes[66]; - p_llrRes = (__m512i*) &llrRes [18048]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]); + ((__m512i*)bnProcBufRes)[720 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[282 + i ], ((__m512i*) bnProcBuf)[720 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m512i*) &llrRes [18048]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m512i*)bnProcBufRes)[726 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[282 + i ], ((__m512i*) bnProcBuf)[726 + i]); } // Process group with 14 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [46848]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [46848]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[732 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[288 + i ], ((__m512i*) bnProcBuf)[732 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[738 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[288 + i ], ((__m512i*) bnProcBuf)[738 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[744 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[288 + i ], ((__m512i*) bnProcBuf)[744 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[750 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[288 + i ], ((__m512i*) bnProcBuf)[750 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[756 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[288 + i ], ((__m512i*) bnProcBuf)[756 + i]); } - p_res = &p_bnProcBufRes[30]; - p_llrRes = (__m512i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); + ((__m512i*)bnProcBufRes)[762 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[288 + i ], ((__m512i*) bnProcBuf)[762 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[768 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[288 + i ], ((__m512i*) bnProcBuf)[768 + i]); } - p_res = &p_bnProcBufRes[42]; - p_llrRes = (__m512i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); + ((__m512i*)bnProcBufRes)[774 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[288 + i ], ((__m512i*) bnProcBuf)[774 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m512i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m512i*)bnProcBufRes)[780 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[288 + i ], ((__m512i*) bnProcBuf)[780 + i]); } - p_res = &p_bnProcBufRes[54]; - p_llrRes = (__m512i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); + ((__m512i*)bnProcBufRes)[786 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[288 + i ], ((__m512i*) bnProcBuf)[786 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m512i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m512i*)bnProcBufRes)[792 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[288 + i ], ((__m512i*) bnProcBuf)[792 + i]); } - p_res = &p_bnProcBufRes[66]; - p_llrRes = (__m512i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]); + ((__m512i*)bnProcBufRes)[798 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[288 + i ], ((__m512i*) bnProcBuf)[798 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m512i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m512i*)bnProcBufRes)[804 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[288 + i ], ((__m512i*) bnProcBuf)[804 + i]); } - p_res = &p_bnProcBufRes[78]; - p_llrRes = (__m512i*) &llrRes [18432]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[78 + i]); + ((__m512i*)bnProcBufRes)[810 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[288 + i ], ((__m512i*) bnProcBuf)[810 + i]); } // Process group with 15 CNs // Process group with 16 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [52224]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [52224]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[816 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[294 + i ], ((__m512i*) bnProcBuf)[816 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[822 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[294 + i ], ((__m512i*) bnProcBuf)[822 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[828 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[294 + i ], ((__m512i*) bnProcBuf)[828 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[834 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[294 + i ], ((__m512i*) bnProcBuf)[834 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[840 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[294 + i ], ((__m512i*) bnProcBuf)[840 + i]); } - p_res = &p_bnProcBufRes[30]; - p_llrRes = (__m512i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); + ((__m512i*)bnProcBufRes)[846 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[294 + i ], ((__m512i*) bnProcBuf)[846 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[852 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[294 + i ], ((__m512i*) bnProcBuf)[852 + i]); } - p_res = &p_bnProcBufRes[42]; - p_llrRes = (__m512i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); + ((__m512i*)bnProcBufRes)[858 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[294 + i ], ((__m512i*) bnProcBuf)[858 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m512i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m512i*)bnProcBufRes)[864 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[294 + i ], ((__m512i*) bnProcBuf)[864 + i]); } - p_res = &p_bnProcBufRes[54]; - p_llrRes = (__m512i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); + ((__m512i*)bnProcBufRes)[870 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[294 + i ], ((__m512i*) bnProcBuf)[870 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m512i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m512i*)bnProcBufRes)[876 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[294 + i ], ((__m512i*) bnProcBuf)[876 + i]); } - p_res = &p_bnProcBufRes[66]; - p_llrRes = (__m512i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]); + ((__m512i*)bnProcBufRes)[882 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[294 + i ], ((__m512i*) bnProcBuf)[882 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m512i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m512i*)bnProcBufRes)[888 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[294 + i ], ((__m512i*) bnProcBuf)[888 + i]); } - p_res = &p_bnProcBufRes[78]; - p_llrRes = (__m512i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[78 + i]); + ((__m512i*)bnProcBufRes)[894 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[294 + i ], ((__m512i*) bnProcBuf)[894 + i]); } - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m512i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); + ((__m512i*)bnProcBufRes)[900 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[294 + i ], ((__m512i*) bnProcBuf)[900 + i]); } - p_res = &p_bnProcBufRes[90]; - p_llrRes = (__m512i*) &llrRes [18816]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[90 + i]); + ((__m512i*)bnProcBufRes)[906 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[294 + i ], ((__m512i*) bnProcBuf)[906 + i]); } // Process group with 17 CNs // Process group with 18 CNs @@ -558,236 +334,142 @@ void nrLDPC_bnProc_BG2_R15_AVX512(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_ // Process group with 21 CNs // Process group with 22 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [58368]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [58368]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[912 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[912 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[918 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[918 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[924 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[924 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[930 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[930 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[936 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[936 + i]); } - p_res = &p_bnProcBufRes[30]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); + ((__m512i*)bnProcBufRes)[942 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[942 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[948 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[948 + i]); } - p_res = &p_bnProcBufRes[42]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); + ((__m512i*)bnProcBufRes)[954 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[954 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m512i*)bnProcBufRes)[960 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[960 + i]); } - p_res = &p_bnProcBufRes[54]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); + ((__m512i*)bnProcBufRes)[966 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[966 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m512i*)bnProcBufRes)[972 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[972 + i]); } - p_res = &p_bnProcBufRes[66]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]); + ((__m512i*)bnProcBufRes)[978 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[978 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m512i*)bnProcBufRes)[984 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[984 + i]); } - p_res = &p_bnProcBufRes[78]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[78 + i]); + ((__m512i*)bnProcBufRes)[990 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[990 + i]); } - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); + ((__m512i*)bnProcBufRes)[996 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[996 + i]); } - p_res = &p_bnProcBufRes[90]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[90 + i]); + ((__m512i*)bnProcBufRes)[1002 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[1002 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m512i*)bnProcBufRes)[1008 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[1008 + i]); } - p_res = &p_bnProcBufRes[102]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[102 + i]); + ((__m512i*)bnProcBufRes)[1014 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[1014 + i]); } - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); + ((__m512i*)bnProcBufRes)[1020 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[1020 + i]); } - p_res = &p_bnProcBufRes[114]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[114 + i]); + ((__m512i*)bnProcBufRes)[1026 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[1026 + i]); } - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); + ((__m512i*)bnProcBufRes)[1032 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[1032 + i]); } - p_res = &p_bnProcBufRes[126]; - p_llrRes = (__m512i*) &llrRes [19200]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[126 + i]); + ((__m512i*)bnProcBufRes)[1038 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[300 + i ], ((__m512i*) bnProcBuf)[1038 + i]); } // Process group with <23 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [66816]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [66816]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[1044 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[306 + i ], ((__m512i*) bnProcBuf)[1044 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[1050 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[306 + i ], ((__m512i*) bnProcBuf)[1050 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[1056 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[306 + i ], ((__m512i*) bnProcBuf)[1056 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[1062 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[306 + i ], ((__m512i*) bnProcBuf)[1062 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[1068 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[306 + i ], ((__m512i*) bnProcBuf)[1068 + i]); } - p_res = &p_bnProcBufRes[30]; - p_llrRes = (__m512i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); + ((__m512i*)bnProcBufRes)[1074 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[306 + i ], ((__m512i*) bnProcBuf)[1074 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[1080 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[306 + i ], ((__m512i*) bnProcBuf)[1080 + i]); } - p_res = &p_bnProcBufRes[42]; - p_llrRes = (__m512i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); + ((__m512i*)bnProcBufRes)[1086 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[306 + i ], ((__m512i*) bnProcBuf)[1086 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m512i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m512i*)bnProcBufRes)[1092 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[306 + i ], ((__m512i*) bnProcBuf)[1092 + i]); } - p_res = &p_bnProcBufRes[54]; - p_llrRes = (__m512i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); + ((__m512i*)bnProcBufRes)[1098 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[306 + i ], ((__m512i*) bnProcBuf)[1098 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m512i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m512i*)bnProcBufRes)[1104 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[306 + i ], ((__m512i*) bnProcBuf)[1104 + i]); } - p_res = &p_bnProcBufRes[66]; - p_llrRes = (__m512i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]); + ((__m512i*)bnProcBufRes)[1110 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[306 + i ], ((__m512i*) bnProcBuf)[1110 + i]); } - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m512i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); + ((__m512i*)bnProcBufRes)[1116 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[306 + i ], ((__m512i*) bnProcBuf)[1116 + i]); } - p_res = &p_bnProcBufRes[78]; - p_llrRes = (__m512i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[78 + i]); + ((__m512i*)bnProcBufRes)[1122 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[306 + i ], ((__m512i*) bnProcBuf)[1122 + i]); } - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m512i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); + ((__m512i*)bnProcBufRes)[1128 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[306 + i ], ((__m512i*) bnProcBuf)[1128 + i]); } - p_res = &p_bnProcBufRes[90]; - p_llrRes = (__m512i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[90 + i]); + ((__m512i*)bnProcBufRes)[1134 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[306 + i ], ((__m512i*) bnProcBuf)[1134 + i]); } - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m512i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); + ((__m512i*)bnProcBufRes)[1140 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[306 + i ], ((__m512i*) bnProcBuf)[1140 + i]); } - p_res = &p_bnProcBufRes[102]; - p_llrRes = (__m512i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[102 + i]); + ((__m512i*)bnProcBufRes)[1146 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[306 + i ], ((__m512i*) bnProcBuf)[1146 + i]); } - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m512i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); + ((__m512i*)bnProcBufRes)[1152 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[306 + i ], ((__m512i*) bnProcBuf)[1152 + i]); } - p_res = &p_bnProcBufRes[114]; - p_llrRes = (__m512i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[114 + i]); + ((__m512i*)bnProcBufRes)[1158 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[306 + i ], ((__m512i*) bnProcBuf)[1158 + i]); } - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m512i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); + ((__m512i*)bnProcBufRes)[1164 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[306 + i ], ((__m512i*) bnProcBuf)[1164 + i]); } - p_res = &p_bnProcBufRes[126]; - p_llrRes = (__m512i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[126 + i]); + ((__m512i*)bnProcBufRes)[1170 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[306 + i ], ((__m512i*) bnProcBuf)[1170 + i]); } - p_res = &p_bnProcBufRes[132]; - p_llrRes = (__m512i*) &llrRes [19584]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); + ((__m512i*)bnProcBufRes)[1176 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[306 + i ], ((__m512i*) bnProcBuf)[1176 + i]); } // Process group with 24 CNs // Process group with 25 CNs diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R23_AVX512.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R23_AVX512.h index 81d420ccfd040d0194e1eb65658fc86bf608c029..6ba0205623c9c0d9fb2d1619f97a3285a08de6db 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R23_AVX512.h +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R23_AVX512.h @@ -1,130 +1,76 @@ #include <stdint.h> #include <immintrin.h> void nrLDPC_bnProc_BG2_R23_AVX512(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) { - __m512i* p_bnProcBuf; - __m512i* p_bnProcBufRes; - __m512i* p_llrRes; - __m512i* p_res; uint32_t M, i; // Process group with 2 CNs M = (3*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [1152]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [1152]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [1152]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[18 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[18 + i ], ((__m512i*) bnProcBuf)[18 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [1152]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[36 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[18 + i ], ((__m512i*) bnProcBuf)[36 + i]); } // Process group with 3 CNs M = (5*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [3456]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [3456]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [2304]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[54 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[36 + i ], ((__m512i*) bnProcBuf)[54 + i]); } - p_res = &p_bnProcBufRes[30]; - p_llrRes = (__m512i*) &llrRes [2304]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); + ((__m512i*)bnProcBufRes)[84 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[36 + i ], ((__m512i*) bnProcBuf)[84 + i]); } - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m512i*) &llrRes [2304]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); + ((__m512i*)bnProcBufRes)[114 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[36 + i ], ((__m512i*) bnProcBuf)[114 + i]); } // Process group with 4 CNs M = (3*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [9216]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [9216]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [4224]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[144 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[66 + i ], ((__m512i*) bnProcBuf)[144 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [4224]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[162 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[66 + i ], ((__m512i*) bnProcBuf)[162 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [4224]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[180 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[66 + i ], ((__m512i*) bnProcBuf)[180 + i]); } - p_res = &p_bnProcBufRes[54]; - p_llrRes = (__m512i*) &llrRes [4224]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); + ((__m512i*)bnProcBufRes)[198 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[66 + i ], ((__m512i*) bnProcBuf)[198 + i]); } // Process group with 5 CNs M = (2*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [13824]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [13824]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [5376]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[216 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[84 + i ], ((__m512i*) bnProcBuf)[216 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [5376]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[228 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[84 + i ], ((__m512i*) bnProcBuf)[228 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [5376]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[240 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[84 + i ], ((__m512i*) bnProcBuf)[240 + i]); } - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m512i*) &llrRes [5376]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); + ((__m512i*)bnProcBufRes)[252 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[84 + i ], ((__m512i*) bnProcBuf)[252 + i]); } - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m512i*) &llrRes [5376]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); + ((__m512i*)bnProcBufRes)[264 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[84 + i ], ((__m512i*) bnProcBuf)[264 + i]); } // Process group with 6 CNs M = (1*Z + 63)>>6; - p_bnProcBuf = (__m512i*) &bnProcBuf [17664]; - p_bnProcBufRes = (__m512i*) &bnProcBufRes [17664]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m512i*) &llrRes [6144]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); + ((__m512i*)bnProcBufRes)[276 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[96 + i ], ((__m512i*) bnProcBuf)[276 + i]); } - p_res = &p_bnProcBufRes[6]; - p_llrRes = (__m512i*) &llrRes [6144]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); + ((__m512i*)bnProcBufRes)[282 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[96 + i ], ((__m512i*) bnProcBuf)[282 + i]); } - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m512i*) &llrRes [6144]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); + ((__m512i*)bnProcBufRes)[288 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[96 + i ], ((__m512i*) bnProcBuf)[288 + i]); } - p_res = &p_bnProcBufRes[18]; - p_llrRes = (__m512i*) &llrRes [6144]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); + ((__m512i*)bnProcBufRes)[294 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[96 + i ], ((__m512i*) bnProcBuf)[294 + i]); } - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m512i*) &llrRes [6144]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); + ((__m512i*)bnProcBufRes)[300 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[96 + i ], ((__m512i*) bnProcBuf)[300 + i]); } - p_res = &p_bnProcBufRes[30]; - p_llrRes = (__m512i*) &llrRes [6144]; for (i=0;i<M;i++) { - p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); + ((__m512i*)bnProcBufRes)[306 + i ] = _mm512_subs_epi8(((__m512i*)llrRes)[96 + i ], ((__m512i*) bnProcBuf)[306 + i]); } // Process group with 7 CNs // Process group with 8 CNs diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG1_R23_AVX512.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG1_R23_AVX512.h index 902da267d9f611fe0a5a830a8047fe34d401a243..3ae281ec29256badfa928ccd8ce6b88a44fca61c 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG1_R23_AVX512.h +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG1_R23_AVX512.h @@ -25,8 +25,8 @@ static inline void nrLDPC_bnProcPc_BG1_R23_AVX512(int8_t* bnProcBuf,int8_t* llrR p_llrProcBuf = (__m256i*) &llrProcBuf [3456]; p_llrRes = (__m512i*) &llrRes [3456]; for (int i=0,j=0;i<M;i++,j+=2) { - zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]); - zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[j + 1]); + zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]); + zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf[j + 1]); zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]); zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0); zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]); diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG1_R89_AVX512.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG1_R89_AVX512.h index 4731c417df7a969b3f533550589b3314e0d04400..6bd1ce1fa8a5caf58027fbf30a18a40e0f23f242 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG1_R89_AVX512.h +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG1_R89_AVX512.h @@ -25,8 +25,8 @@ static inline void nrLDPC_bnProcPc_BG1_R89_AVX512(int8_t* bnProcBuf,int8_t* llrR p_llrProcBuf = (__m256i*) &llrProcBuf [384]; p_llrRes = (__m512i*) &llrRes [384]; for (int i=0,j=0;i<M;i++,j+=2) { - zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]); - zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[j + 1]); + zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]); + zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf[j + 1]); zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]); zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0); zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]); diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG2_R13_AVX512.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG2_R13_AVX512.h index 67fc3eea3d3edb3a7aabbe62e6e42f06340c045c..3b80edd69fe802a5761ca6b062147dc9a6111e4b 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG2_R13_AVX512.h +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG2_R13_AVX512.h @@ -25,8 +25,8 @@ static inline void nrLDPC_bnProcPc_BG2_R13_AVX512(int8_t* bnProcBuf,int8_t* llrR p_llrProcBuf = (__m256i*) &llrProcBuf [6912]; p_llrRes = (__m512i*) &llrRes [6912]; for (int i=0,j=0;i<M;i++,j+=2) { - zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]); - zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[j + 1]); + zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]); + zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf[j + 1]); zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]); zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0); zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]); diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG2_R23_AVX512.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG2_R23_AVX512.h index 6330988dd3a1f306137541710367bd604541651b..ed85fe5dc15653bc7470d38d28f9770e7aea0f27 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG2_R23_AVX512.h +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG2_R23_AVX512.h @@ -25,8 +25,8 @@ static inline void nrLDPC_bnProcPc_BG2_R23_AVX512(int8_t* bnProcBuf,int8_t* llrR p_llrProcBuf = (__m256i*) &llrProcBuf [1152]; p_llrRes = (__m512i*) &llrRes [1152]; for (int i=0,j=0;i<M;i++,j+=2) { - zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]); - zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[j + 1]); + zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]); + zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf[j + 1]); zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]); zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0); zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);