diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG1_avx2.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG1_avx2.c
index feb0fa7089338aa0ebfa855ac9318ed7d9b68604..fd015580495e5c1832ff666653ad45e189d5b88a 100644
--- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG1_avx2.c
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG1_avx2.c
@@ -71,7 +71,7 @@ fprintf(fd,  "// Process group with 1 CNs \n");
 
 
  // Process group with 2 CNs
-
+/*
     if (lut_numBnInBnGroups[0] > 0)
     {
         // If elements in group move to next address
@@ -123,7 +123,7 @@ fprintf(fd,  "// Process group with 1 CNs \n");
     }
       // =====================================================================
     // Process group with 2 CNs
-
+*/
 
 fprintf(fd,  "// Process group with 2 CNs \n");
 
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG1_avx2.o b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG1_avx2.o
new file mode 100644
index 0000000000000000000000000000000000000000..8caf4b85a31f5704239e0d6aa2071be5529f93a2
Binary files /dev/null and b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG1_avx2.o differ
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG2_avx2.o b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG2_avx2.o
new file mode 100644
index 0000000000000000000000000000000000000000..2f6a08ea358ebdad5d329ffe676e4f502c3abf58
Binary files /dev/null and b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProcPc_gen_BG2_avx2.o differ
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG1_avx2.o b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG1_avx2.o
new file mode 100644
index 0000000000000000000000000000000000000000..d40fd0a3af91fdbdb82afdac3ac2f5500eec1dc6
Binary files /dev/null and b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG1_avx2.o differ
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG2_avx2.o b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG2_avx2.o
new file mode 100644
index 0000000000000000000000000000000000000000..248c7244cf105f023b1896cd4308aee4e0e8ce83
Binary files /dev/null and b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_BG2_avx2.o differ
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_avx2 b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_avx2
index d2fd3730144602aa16b4728e7acbcaff53d6e823..8e7d6324bd6cb8fa9fbb81d57e9d9c56413530da 100755
Binary files a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_avx2 and b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/bnProc_gen_avx2 differ
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/main.o b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/main.o
new file mode 100644
index 0000000000000000000000000000000000000000..c9ea8704ae2d57a0c36127605f696ab9b1febb99
Binary files /dev/null and b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/main.o differ
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/sauvegarde.tar.gz b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/sauvegarde.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..438d0b4b2111a33261ab02399920965505229458
Binary files /dev/null and b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/sauvegarde.tar.gz differ
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProcPc_gen_BG1_avx512.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProcPc_gen_BG1_avx512.c
new file mode 100644
index 0000000000000000000000000000000000000000..09bd99521639960ad989eb46a832cf33ee5c96af
--- /dev/null
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProcPc_gen_BG1_avx512.c
@@ -0,0 +1,1768 @@
+
+
+
+
+#include <stdint.h>
+#include <immintrin.h>
+#include "../../nrLDPCdecoder_defs.h"
+#include "../../nrLDPC_types.h"
+
+
+void nrLDPC_bnProcPc_BG1_generator_AVX512(int R)
+{
+  const char *ratestr[3]={"13","23","89"};
+
+  if (R<0 || R>2) {printf("Illegal R %d\n",R); abort();}
+
+
+
+
+  char fname[50];
+  sprintf(fname,"../ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG1_R%s_AVX512.h",ratestr[R]);
+  FILE *fd=fopen(fname,"w");
+  if (fd == NULL) {printf("Cannot create \n");abort();}
+
+
+
+  fprintf(fd,"static inline void nrLDPC_bnProcPc_BG1_R%s_AVX512(int8_t* bnProcBuf,int8_t* llrRes ,  int8_t* llrProcBuf, uint16_t Z ) {\n",ratestr[R]);
+    const uint8_t*  lut_numBnInBnGroups;
+    const uint32_t* lut_startAddrBnGroups;
+    const uint16_t* lut_startAddrBnGroupsLlr;
+    if (R==0) {
+
+
+      lut_numBnInBnGroups =  lut_numBnInBnGroups_BG1_R13;
+      lut_startAddrBnGroups = lut_startAddrBnGroups_BG1_R13;
+      lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG1_R13;
+
+    }
+    else if (R==1){
+
+      lut_numBnInBnGroups =  lut_numBnInBnGroups_BG1_R23;
+      lut_startAddrBnGroups = lut_startAddrBnGroups_BG1_R23;
+      lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG1_R23;
+    }
+    else if (R==2) {
+
+      lut_numBnInBnGroups = lut_numBnInBnGroups_BG1_R89;
+      lut_startAddrBnGroups = lut_startAddrBnGroups_BG1_R89;
+      lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG1_R89;
+    }
+  else { printf("aborting, illegal R %d\n",R); fclose(fd);abort();}
+        // Number of BNs in Groups
+//    uint32_t M;
+    //uint32_t M32rem;
+    //uint32_t i,j;
+    uint32_t k;
+    // Offset to each bit within a group in terms of 32 Byte
+    uint32_t cnOffsetInGroup;
+    uint8_t idxBnGroup = 0;
+
+    fprintf(fd,"   __m512i zmm0, zmm1, zmmRes0, zmmRes1;  \n");
+
+
+    fprintf(fd,"        __m256i* p_bnProcBuf; \n");
+    fprintf(fd,"        __m256i* p_llrProcBuf;\n");
+    fprintf(fd,"        __m512i* p_llrRes; \n");
+    fprintf(fd,"         uint32_t M ;\n");
+
+
+fprintf(fd,  "// Process group with 1 CNs \n");
+
+
+ // Process group with 2 CNs
+
+    if (lut_numBnInBnGroups[0] > 0)
+    {
+        // If elements in group move to next address
+       // idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[0] );
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[0]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+
+        // Loop over BNs
+        fprintf(fd,"            for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[j + 1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<1; k++)
+        {
+        fprintf(fd,"            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"            zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"            zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"            zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"            zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+
+        fprintf(fd,"}\n");
+    }
+      // =====================================================================
+    // Process group with 2 CNs
+
+
+fprintf(fd,  "// Process group with 2 CNs \n");
+
+ // Process group with 2 CNs
+
+    if (lut_numBnInBnGroups[1] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[1] );
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[1]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+
+        // Loop over BNs
+        fprintf(fd,"            for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[j + 1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<2; k++)
+        {
+        fprintf(fd,"            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"            zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"            zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"            zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"            zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+
+        fprintf(fd,"}\n");
+    }
+
+    // =====================================================================
+    // Process group with 3 CNs
+
+
+fprintf(fd,  "// Process group with 3 CNs \n");
+
+ // Process group with 3 CNs
+
+    if (lut_numBnInBnGroups[2] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[2] );
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[2]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<3; k++)
+        {
+        fprintf(fd,"        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd,"        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+            }
+
+            // Add LLR from receiver input
+        fprintf(fd,"        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+
+    // =====================================================================
+    // Process group with 4 CNs
+
+fprintf(fd,  "// Process group with 4 CNs \n");
+
+ // Process group with 4 CNs
+
+    if (lut_numBnInBnGroups[3] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[3] );
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[3]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<4; k++)
+        {
+        fprintf(fd,"        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+   // =====================================================================
+    // Process group with 5 CNs
+
+fprintf(fd,  "// Process group with 5 CNs \n");
+
+ // Process group with 5 CNs
+
+    if (lut_numBnInBnGroups[4] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[4] );
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[4]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<5; k++)
+        {
+        fprintf(fd,"        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+
+   // =====================================================================
+    // Process group with 6 CNs
+
+fprintf(fd,  "// Process group with 6 CNs \n");
+
+ // Process group with 6 CNs
+
+    if (lut_numBnInBnGroups[5] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[5] );
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[5]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<6; k++)
+        {
+        fprintf(fd,"        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+   // =====================================================================
+    // Process group with 7 CNs
+
+fprintf(fd,  "// Process group with 7 CNs \n");
+
+ // Process group with 7 CNs
+
+    if (lut_numBnInBnGroups[6] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[6] );
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[6]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<7; k++)
+        {
+        fprintf(fd,"        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+        //fprintf(fd,"         (__m512i*) &llrRes[%d + i]    = _mm512_permutex_epi64(zmm0, 0xD8);\n",lut_startAddrBnGroupsLlr[idxBnGroup]>>6 );
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+   // =====================================================================
+    // Process group with 8 CNs
+
+fprintf(fd,  "// Process group with 8 CNs \n");
+
+ // Process group with 8 CNs
+
+    if (lut_numBnInBnGroups[7] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[7] );
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[7]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<8; k++)
+        {
+        fprintf(fd,"        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+        //fprintf(fd,"         (__m512i*) &llrRes[%d + i]    = _mm512_permutex_epi64(zmm0, 0xD8);\n",lut_startAddrBnGroupsLlr[idxBnGroup]>>6 );
+
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+   // =====================================================================
+    // Process group with 9 CNs
+
+fprintf(fd,  "// Process group with 9 CNs \n");
+
+ // Process group with 9 CNs
+
+    if (lut_numBnInBnGroups[8] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[8] );
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[8]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<9; k++)
+        {
+        fprintf(fd,"        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+        //fprintf(fd,"         (__m512i*) &llrRes[%d + i]    = _mm512_permutex_epi64(zmm0, 0xD8);\n",lut_startAddrBnGroupsLlr[idxBnGroup]>>6 );
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+   // =====================================================================
+    // Process group with 10 CNs
+
+fprintf(fd,  "// Process group with 10 CNs \n");
+
+ // Process group with 10 CNs
+
+    if (lut_numBnInBnGroups[9] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[9] );
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[9]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<10; k++)
+        {
+        fprintf(fd,"        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+
+    // =====================================================================
+
+fprintf(fd,  "// Process group with 11 CNs \n");
+
+
+
+ // Process group with 2 CNs
+
+    if (lut_numBnInBnGroups[10] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[10] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[10]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"            for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"            zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"            zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<11; k++)
+        {
+        fprintf(fd,"            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"            zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"            zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"            zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"            zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+      // =====================================================================
+    // Process group with 2 CNs
+
+
+fprintf(fd,  "// Process group with 12 CNs \n");
+
+ // Process group with 2 CNs
+
+    if (lut_numBnInBnGroups[11] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[11] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[11]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"            for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"            zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"            zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<12; k++)
+        {
+        fprintf(fd,"            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"            zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"            zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"            zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"            zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+    // =====================================================================
+    // Process group with 13 CNs
+
+
+fprintf(fd,  "// Process group with 13 CNs \n");
+
+ // Process group with 3 CNs
+
+    if (lut_numBnInBnGroups[12] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[12] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[12]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<13; k++)
+        {
+        fprintf(fd,"        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd,"        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+            }
+
+            // Add LLR from receiver input
+        fprintf(fd,"        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+
+    // =====================================================================
+    // Process group with 4 CNs
+
+fprintf(fd,  "// Process group with 14 CNs \n");
+
+ // Process group with 4 CNs
+
+    if (lut_numBnInBnGroups[13] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[13] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[13]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<14; k++)
+        {
+        fprintf(fd,"        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+   // =====================================================================
+    // Process group with 5 CNs
+
+fprintf(fd,  "// Process group with 15 CNs \n");
+
+ // Process group with 5 CNs
+
+    if (lut_numBnInBnGroups[14] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[14] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[14]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<15; k++)
+        {
+        fprintf(fd,"        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+         fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+
+   // =====================================================================
+    // Process group with 6 CNs
+
+fprintf(fd,  "// Process group with 16 CNs \n");
+
+ // Process group with 6 CNs
+
+    if (lut_numBnInBnGroups[15] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[15] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[15]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<16; k++)
+        {
+        fprintf(fd,"        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+   // =====================================================================
+    // Process group with 17 CNs
+
+fprintf(fd,  "// Process group with 17 CNs \n");
+
+ // Process group with 17 CNs
+
+    if (lut_numBnInBnGroups[16] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[16] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[16]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<17; k++)
+        {
+        fprintf(fd,"        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+   // =====================================================================
+    // Process group with 18 CNs
+
+fprintf(fd,  "// Process group with 18 CNs \n");
+
+ // Process group with 8 CNs
+
+    if (lut_numBnInBnGroups[17] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[17] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[17]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<18; k++)
+        {
+        fprintf(fd,"        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+   // =====================================================================
+    // Process group with 9 CNs
+
+fprintf(fd,  "// Process group with 19 CNs \n");
+
+ // Process group with 9 CNs
+
+    if (lut_numBnInBnGroups[18] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[18] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[18]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<19; k++)
+        {
+        fprintf(fd,"        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+   // =====================================================================
+    // Process group with 20 CNs
+
+fprintf(fd,  "// Process group with 20 CNs \n");
+
+ // Process group with 20 CNs
+
+    if (lut_numBnInBnGroups[19] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[19] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[19]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<20; k++)
+        {
+        fprintf(fd,"        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+
+
+
+    // =====================================================================
+
+fprintf(fd,  "// Process group with 21 CNs \n");
+
+
+
+ // Process group with 2 CNs
+
+    if (lut_numBnInBnGroups[20] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[20] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[20]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"            for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"            zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"            zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<21; k++)
+        {
+        fprintf(fd,"            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"            zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"            zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"            zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"            zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+      // =====================================================================
+    // Process group with 2 CNs
+
+
+fprintf(fd,  "// Process group with 22 CNs \n");
+
+ // Process group with 2 CNs
+
+    if (lut_numBnInBnGroups[21] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[21] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[21]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"            for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"            zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"            zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<22; k++)
+        {
+        fprintf(fd,"            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"            zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"            zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"            zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"            zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+         fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+    // =====================================================================
+    // Process group with 13 CNs
+
+
+fprintf(fd,  "// Process group with <23 CNs \n");
+
+ // Process group with 3 CNs
+
+    if (lut_numBnInBnGroups[22] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[22] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[22]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<23; k++)
+        {
+        fprintf(fd,"        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd,"        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+            }
+
+            // Add LLR from receiver input
+        fprintf(fd,"        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+
+    // =====================================================================
+    // Process group with 4 CNs
+
+fprintf(fd,  "// Process group with 24 CNs \n");
+
+ // Process group with 4 CNs
+
+    if (lut_numBnInBnGroups[23] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[23] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[23]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<24; k++)
+        {
+        fprintf(fd,"        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+   // =====================================================================
+    // Process group with 5 CNs
+
+fprintf(fd,  "// Process group with 25 CNs \n");
+
+ // Process group with 5 CNs
+
+    if (lut_numBnInBnGroups[24] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[24] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[24]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<25; k++)
+        {
+        fprintf(fd,"        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+
+   // =====================================================================
+    // Process group with 6 CNs
+
+fprintf(fd,  "// Process group with 26 CNs \n");
+
+ // Process group with 6 CNs
+
+    if (lut_numBnInBnGroups[25] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[25] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[25]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<26; k++)
+        {
+        fprintf(fd,"        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+   // =====================================================================
+    // Process group with 17 CNs
+
+fprintf(fd,  "// Process group with 27 CNs \n");
+
+ // Process group with 17 CNs
+
+    if (lut_numBnInBnGroups[26] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[26] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[26]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<27; k++)
+        {
+        fprintf(fd,"        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+   // =====================================================================
+    // Process group with 18 CNs
+
+fprintf(fd,  "// Process group with 28 CNs \n");
+
+ // Process group with 8 CNs
+
+    if (lut_numBnInBnGroups[27] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[27] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[27]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<28; k++)
+        {
+        fprintf(fd,"        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+   // =====================================================================
+    // Process group with 9 CNs
+
+fprintf(fd,  "// Process group with 29 CNs \n");
+
+ // Process group with 9 CNs
+
+    if (lut_numBnInBnGroups[28] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[28] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[28]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<29; k++)
+        {
+        fprintf(fd,"        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+   // =====================================================================
+    // Process group with 20 CNs
+
+fprintf(fd,  "// Process group with 30 CNs \n");
+
+ // Process group with 20 CNs
+
+    if (lut_numBnInBnGroups[29] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[29] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[29]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<30; k++)
+        {
+        fprintf(fd,"        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);\n");
+
+        fprintf(fd,"        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);\n");
+            // zmm0     = [zmmRes1[255:256] zmmRes0[255:256] zmmRes1[127:0] zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256] zmmRes1[127:0] zmmRes0[255:256] zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+    fprintf(fd,"}\n");
+  fclose(fd);
+}//end of the function  nrLDPC_bnProcPc_BG1
+
+
+
+
+
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProcPc_gen_BG2_avx512.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProcPc_gen_BG2_avx512.c
new file mode 100644
index 0000000000000000000000000000000000000000..a025ae8efb3b1b653720ae741c66e2d8f1ce38a6
--- /dev/null
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProcPc_gen_BG2_avx512.c
@@ -0,0 +1,1770 @@
+
+
+
+#include <stdint.h>
+#include <immintrin.h>
+#include "../../nrLDPCdecoder_defs.h"
+#include "../../nrLDPC_types.h"
+
+
+void nrLDPC_bnProcPc_BG2_generator_AVX512(int R)
+{
+  const char *ratestr[3]={"15","13","23"};
+
+  if (R<0 || R>2) {printf("Illegal R %d\n",R); abort();}
+
+
+ // system("mkdir -p ../ldpc_gen_files");
+
+  char fname[50];
+  sprintf(fname,"../ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG2_R%s_AVX512.h",ratestr[R]);
+  FILE *fd=fopen(fname,"w");
+  if (fd == NULL) {printf("Cannot create \n");abort();}
+
+//  fprintf(fd,"#include <stdint.h>\n");
+  //fprintf(fd,"#include <immintrin.h>\n");
+
+  fprintf(fd,"static inline void nrLDPC_bnProcPc_BG2_R%s_AVX512(int8_t* bnProcBuf,int8_t* llrRes ,  int8_t* llrProcBuf, uint16_t Z ) {\n",ratestr[R]);
+    const uint8_t*  lut_numBnInBnGroups;
+    const uint32_t* lut_startAddrBnGroups;
+    const uint16_t* lut_startAddrBnGroupsLlr;
+    if (R==0) {
+
+
+      lut_numBnInBnGroups =  lut_numBnInBnGroups_BG2_R15;
+      lut_startAddrBnGroups = lut_startAddrBnGroups_BG2_R15;
+      lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG2_R15;
+
+    }
+    else if (R==1){
+
+      lut_numBnInBnGroups =  lut_numBnInBnGroups_BG2_R13;
+      lut_startAddrBnGroups = lut_startAddrBnGroups_BG2_R13;
+      lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG2_R13;
+    }
+    else if (R==2) {
+
+      lut_numBnInBnGroups = lut_numBnInBnGroups_BG2_R23;
+      lut_startAddrBnGroups = lut_startAddrBnGroups_BG2_R23;
+      lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG2_R23;
+    }
+  else { printf("aborting, illegal R %d\n",R); fclose(fd);abort();}
+
+
+    uint32_t k;
+    // Offset to each bit within a group in terms of 32 Byte
+    uint32_t cnOffsetInGroup;
+    uint8_t idxBnGroup = 0;
+
+    fprintf(fd,"   __m512i zmm0,zmm1,zmmRes0,zmmRes1;  \n");
+
+
+    fprintf(fd,"        __m256i* p_bnProcBuf; \n");
+    fprintf(fd,"        __m256i* p_llrProcBuf;\n");
+    fprintf(fd,"        __m512i* p_llrRes; \n");
+    fprintf(fd,"         uint32_t M ;\n");
+
+
+fprintf(fd,  "// Process group with 1 CNs \n");
+
+
+ // Process group with 2 CNs
+
+    if (lut_numBnInBnGroups[0] > 0)
+    {
+        // If elements in group move to next address
+       // idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[0] );
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[0]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+
+        // Loop over BNs
+        fprintf(fd,"            for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"           zmm1 = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+
+            // Loop over CNs
+        /*for (k=1; k<1; k++)
+        {
+        fprintf(fd,"           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+        }
+*/
+            // Add LLR from receiver input
+        fprintf(fd,"           zmm0    = _mm512_cvtepi8_epi16(p_bnProcBuf[j+1]);\n");
+        fprintf(fd,"           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"           zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"           zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"           zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+
+        fprintf(fd,"}\n");
+    }
+      // =====================================================================
+    // Process group with 2 CNs
+
+
+fprintf(fd,  "// Process group with 2 CNs \n");
+
+ // Process group with 2 CNs
+
+    if (lut_numBnInBnGroups[1] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[1] );
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[1]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+
+        // Loop over BNs
+        fprintf(fd,"            for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[j + 1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<2; k++)
+        {
+        fprintf(fd,"           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"           zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"           zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"           zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"           zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+
+        fprintf(fd,"}\n");
+    }
+
+    // =====================================================================
+    // Process group with 3 CNs
+
+
+fprintf(fd,  "// Process group with 3 CNs \n");
+
+ // Process group with 3 CNs
+
+    if (lut_numBnInBnGroups[2] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[2] );
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[2]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<3; k++)
+        {
+        fprintf(fd,"       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd,"       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+            }
+
+            // Add LLR from receiver input
+        fprintf(fd,"       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+
+    // =====================================================================
+    // Process group with 4 CNs
+
+fprintf(fd,  "// Process group with 4 CNs \n");
+
+ // Process group with 4 CNs
+
+    if (lut_numBnInBnGroups[3] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[3] );
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[3]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<4; k++)
+        {
+        fprintf(fd,"       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+   // =====================================================================
+    // Process group with 5 CNs
+
+fprintf(fd,  "// Process group with 5 CNs \n");
+
+ // Process group with 5 CNs
+
+    if (lut_numBnInBnGroups[4] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[4] );
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[4]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<5; k++)
+        {
+        fprintf(fd,"       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+
+   // =====================================================================
+    // Process group with 6 CNs
+
+fprintf(fd,  "// Process group with 6 CNs \n");
+
+ // Process group with 6 CNs
+
+    if (lut_numBnInBnGroups[5] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[5] );
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[5]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<6; k++)
+        {
+        fprintf(fd,"       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+   // =====================================================================
+    // Process group with 7 CNs
+
+fprintf(fd,  "// Process group with 7 CNs \n");
+
+ // Process group with 7 CNs
+
+    if (lut_numBnInBnGroups[6] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[6] );
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[6]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<7; k++)
+        {
+        fprintf(fd,"       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+        //fprintf(fd,"         (__m512i*) &llrRes[%d + i]    = _mm512_permutex_epi64(zmm0, 0xD8);\n",lut_startAddrBnGroupsLlr[idxBnGroup]>>5 );
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+   // =====================================================================
+    // Process group with 8 CNs
+
+fprintf(fd,  "// Process group with 8 CNs \n");
+
+ // Process group with 8 CNs
+
+    if (lut_numBnInBnGroups[7] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[7] );
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[7]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<8; k++)
+        {
+        fprintf(fd,"       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+        //fprintf(fd,"         (__m512i*) &llrRes[%d + i]    = _mm512_permutex_epi64(zmm0, 0xD8);\n",lut_startAddrBnGroupsLlr[idxBnGroup]>>5 );
+
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+   // =====================================================================
+    // Process group with 9 CNs
+
+fprintf(fd,  "// Process group with 9 CNs \n");
+
+ // Process group with 9 CNs
+
+    if (lut_numBnInBnGroups[8] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[8] );
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[8]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<9; k++)
+        {
+        fprintf(fd,"       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+        //fprintf(fd,"         (__m512i*) &llrRes[%d + i]    = _mm512_permutex_epi64(zmm0, 0xD8);\n",lut_startAddrBnGroupsLlr[idxBnGroup]>>5 );
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+   // =====================================================================
+    // Process group with 10 CNs
+
+fprintf(fd,  "// Process group with 10 CNs \n");
+
+ // Process group with 10 CNs
+
+    if (lut_numBnInBnGroups[9] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[9] );
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[9]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<10; k++)
+        {
+        fprintf(fd,"       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+
+    // =====================================================================
+
+fprintf(fd,  "// Process group with 11 CNs \n");
+
+
+
+ // Process group with 2 CNs
+
+    if (lut_numBnInBnGroups[10] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[10] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[10]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"            for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"           zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"           zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<11; k++)
+        {
+        fprintf(fd,"           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"           zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"           zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"           zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"           zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+      // =====================================================================
+    // Process group with 2 CNs
+
+
+fprintf(fd,  "// Process group with 12 CNs \n");
+
+ // Process group with 2 CNs
+
+    if (lut_numBnInBnGroups[11] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[11] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[11]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"            for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"           zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"           zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<12; k++)
+        {
+        fprintf(fd,"           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"           zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"           zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"           zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"           zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+    // =====================================================================
+    // Process group with 13 CNs
+
+
+fprintf(fd,  "// Process group with 13 CNs \n");
+
+ // Process group with 3 CNs
+
+    if (lut_numBnInBnGroups[12] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[12] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[12]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<13; k++)
+        {
+        fprintf(fd,"       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd,"       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+            }
+
+            // Add LLR from receiver input
+        fprintf(fd,"       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+
+    // =====================================================================
+    // Process group with 4 CNs
+
+fprintf(fd,  "// Process group with 14 CNs \n");
+
+ // Process group with 4 CNs
+
+    if (lut_numBnInBnGroups[13] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[13] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[13]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<14; k++)
+        {
+        fprintf(fd,"       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+   // =====================================================================
+    // Process group with 5 CNs
+
+fprintf(fd,  "// Process group with 15 CNs \n");
+
+ // Process group with 5 CNs
+
+    if (lut_numBnInBnGroups[14] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[14] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[14]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<15; k++)
+        {
+        fprintf(fd,"       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+         fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+
+   // =====================================================================
+    // Process group with 6 CNs
+
+fprintf(fd,  "// Process group with 16 CNs \n");
+
+ // Process group with 6 CNs
+
+    if (lut_numBnInBnGroups[15] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[15] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[15]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<16; k++)
+        {
+        fprintf(fd,"       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+   // =====================================================================
+    // Process group with 17 CNs
+
+fprintf(fd,  "// Process group with 17 CNs \n");
+
+ // Process group with 17 CNs
+
+    if (lut_numBnInBnGroups[16] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[16] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[16]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<17; k++)
+        {
+        fprintf(fd,"       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+   // =====================================================================
+    // Process group with 18 CNs
+
+fprintf(fd,  "// Process group with 18 CNs \n");
+
+ // Process group with 8 CNs
+
+    if (lut_numBnInBnGroups[17] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[17] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[17]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<18; k++)
+        {
+        fprintf(fd,"       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+   // =====================================================================
+    // Process group with 9 CNs
+
+fprintf(fd,  "// Process group with 19 CNs \n");
+
+ // Process group with 9 CNs
+
+    if (lut_numBnInBnGroups[18] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[18] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[18]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<19; k++)
+        {
+        fprintf(fd,"       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+   // =====================================================================
+    // Process group with 20 CNs
+
+fprintf(fd,  "// Process group with 20 CNs \n");
+
+ // Process group with 20 CNs
+
+    if (lut_numBnInBnGroups[19] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[19] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[19]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<20; k++)
+        {
+        fprintf(fd,"       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+
+
+
+    // =====================================================================
+
+fprintf(fd,  "// Process group with 21 CNs \n");
+
+
+
+ // Process group with 2 CNs
+
+    if (lut_numBnInBnGroups[20] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[20] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[20]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"            for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"           zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"           zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<21; k++)
+        {
+        fprintf(fd,"           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"           zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"           zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"           zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"           zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+      // =====================================================================
+    // Process group with 2 CNs
+
+
+fprintf(fd,  "// Process group with 22 CNs \n");
+
+ // Process group with 2 CNs
+
+    if (lut_numBnInBnGroups[21] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[21] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[21]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"            for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"           zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"           zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<22; k++)
+        {
+        fprintf(fd,"           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"           zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"           zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"           zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"           zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+         fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+    // =====================================================================
+    // Process group with 13 CNs
+
+
+fprintf(fd,  "// Process group with <23 CNs \n");
+
+ // Process group with 3 CNs
+
+    if (lut_numBnInBnGroups[22] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[22] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[22]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<23; k++)
+        {
+        fprintf(fd,"       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd,"       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+            }
+
+            // Add LLR from receiver input
+        fprintf(fd,"       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+
+    // =====================================================================
+    // Process group with 4 CNs
+
+fprintf(fd,  "// Process group with 24 CNs \n");
+
+ // Process group with 4 CNs
+
+    if (lut_numBnInBnGroups[23] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[23] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[23]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<24; k++)
+        {
+        fprintf(fd,"       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+   // =====================================================================
+    // Process group with 5 CNs
+
+fprintf(fd,  "// Process group with 25 CNs \n");
+
+ // Process group with 5 CNs
+
+    if (lut_numBnInBnGroups[24] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[24] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[24]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<25; k++)
+        {
+        fprintf(fd,"       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+
+   // =====================================================================
+    // Process group with 6 CNs
+
+fprintf(fd,  "// Process group with 26 CNs \n");
+
+ // Process group with 6 CNs
+
+    if (lut_numBnInBnGroups[25] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[25] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[25]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<26; k++)
+        {
+        fprintf(fd,"       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+   // =====================================================================
+    // Process group with 17 CNs
+
+fprintf(fd,  "// Process group with 27 CNs \n");
+
+ // Process group with 17 CNs
+
+    if (lut_numBnInBnGroups[26] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[26] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[26]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<27; k++)
+        {
+        fprintf(fd,"       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+   // =====================================================================
+    // Process group with 18 CNs
+
+fprintf(fd,  "// Process group with 28 CNs \n");
+
+ // Process group with 8 CNs
+
+    if (lut_numBnInBnGroups[27] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[27] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[27]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<28; k++)
+        {
+        fprintf(fd,"       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+   // =====================================================================
+    // Process group with 9 CNs
+
+fprintf(fd,  "// Process group with 29 CNs \n");
+
+ // Process group with 9 CNs
+
+    if (lut_numBnInBnGroups[28] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[28] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[28]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<29; k++)
+        {
+        fprintf(fd,"       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+
+   // =====================================================================
+    // Process group with 20 CNs
+
+fprintf(fd,  "// Process group with 30 CNs \n");
+
+ // Process group with 20 CNs
+
+    if (lut_numBnInBnGroups[29] > 0)
+    {
+        // If elements in group move to next address
+        idxBnGroup++;
+
+        // Number of groups of 32 BNs for parallel processing
+        fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[29] );;
+
+        // Set the offset to each CN within a group in terms of 16 Byte
+        cnOffsetInGroup = (lut_numBnInBnGroups[29]*NR_LDPC_ZMAX)>>5;
+
+        // Set pointers to start of group 2
+        fprintf(fd,"    p_bnProcBuf     = (__m256i*) &bnProcBuf    [%d];\n",lut_startAddrBnGroups[idxBnGroup]);
+        fprintf(fd,"    p_llrProcBuf    = (__m256i*) &llrProcBuf   [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        fprintf(fd,"    p_llrRes        = (__m512i*) &llrRes       [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]);
+        // Loop over BNs
+        fprintf(fd,"        for (int i=0,j=0;i<M;i++,j+=2) {\n");
+            // First 16 LLRs of first CN
+        fprintf(fd,"       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);\n");
+
+            // Loop over CNs
+        for (k=1; k<30; k++)
+        {
+        fprintf(fd,"       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j]);\n", k*cnOffsetInGroup);
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[%d + j +1]);\n", k*cnOffsetInGroup);
+
+        fprintf(fd, "      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); \n");
+        }
+
+            // Add LLR from receiver input
+        fprintf(fd,"       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);\n");
+        fprintf(fd,"       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);\n");
+
+        fprintf(fd,"       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);\n");
+        fprintf(fd,"       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);\n");
+
+            // Pack results back to epi8
+        fprintf(fd,"       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);\n");
+            //zmm0     = [zmmRes1[255:256]zmmRes0[255:256]zmmRes1[127:0]zmmRes0[127:0]]
+            // p_llrRes = [zmmRes1[255:256]zmmRes1[127:0]zmmRes0[255:256]zmmRes0[127:0]]
+        fprintf(fd,"            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);\n");
+
+        fprintf(fd,"}\n");
+    }
+
+    fprintf(fd,"}\n");
+  fclose(fd);
+}//end of the function  nrLDPC_bnProcPc_BG2
+
+
+
+
+
+
+
+
+
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_avx512 b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_avx512
index f686a271f18d31892f83a65819d96e3439325fd9..dabfabe18fdbb098b9ea9a0996079e0fde890445 100755
Binary files a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_avx512 and b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_avx512 differ
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/main.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/main.c
index 3b2bc3ede498433e920ff2431f816ea7aa1a0d53..2bc870f17b10d31ab6e6ad4ac0bb761f5c0ef012 100644
--- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/main.c
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/main.c
@@ -4,8 +4,8 @@
 #define NB_R  3
 void nrLDPC_bnProc_BG1_generator_AVX512(int);
 void nrLDPC_bnProc_BG2_generator_AVX512(int);
-//void nrLDPC_bnProcPc_BG1_generator_AVX2(int);
-//void nrLDPC_bnProcPc_BG2_generator_AVX2(int);
+void nrLDPC_bnProcPc_BG1_generator_AVX512(int);
+void nrLDPC_bnProcPc_BG2_generator_AVX512(int);
 
 int main()
 {
@@ -15,8 +15,8 @@ int main()
         nrLDPC_bnProc_BG1_generator_AVX512(R[i]);
 	nrLDPC_bnProc_BG2_generator_AVX512(R[i]);
         
-//	nrLDPC_bnProcPc_BG1_generator_AVX2(R[i]);
-//        nrLDPC_bnProcPc_BG2_generator_AVX2(R[i]);
+	nrLDPC_bnProcPc_BG1_generator_AVX512(R[i]);
+        nrLDPC_bnProcPc_BG2_generator_AVX512(R[i]);
 
 	}
 
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R13_AVX2.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R13_AVX2.c
new file mode 100644
index 0000000000000000000000000000000000000000..3f1c421cebdee768cc056933786826f33ddf9fe6
--- /dev/null
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R13_AVX2.c
@@ -0,0 +1,440 @@
+#include <stdint.h>
+#include <immintrin.h>
+void nrLDPC_bnProc_BG2_R13_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes,  int8_t* llrRes, uint16_t Z  ) {
+        __m256i* p_bnProcBuf; 
+        __m256i* p_bnProcBufRes; 
+        __m256i* p_llrRes; 
+        __m256i* p_res; 
+        uint32_t M, i; 
+// Process group with 2 CNs 
+ M = (1*Z + 31)>>5;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [6912];
+   p_bnProcBufRes    = (__m256i*) &bnProcBufRes   [6912];
+            p_res = &p_bnProcBufRes[0];
+            p_llrRes = (__m256i*) &llrRes  [6912];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
+}
+            p_res = &p_bnProcBufRes[12];
+            p_llrRes = (__m256i*) &llrRes  [6912];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
+}
+// Process group with 3 CNs 
+// Process group with 4 CNs 
+       M = (2*Z + 31)>>5;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [7680];
+   p_bnProcBufRes    = (__m256i*) &bnProcBufRes   [7680];
+            p_res = &p_bnProcBufRes[0];
+            p_llrRes = (__m256i*) &llrRes  [7296];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
+}
+            p_res = &p_bnProcBufRes[24];
+            p_llrRes = (__m256i*) &llrRes  [7296];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
+}
+            p_res = &p_bnProcBufRes[48];
+            p_llrRes = (__m256i*) &llrRes  [7296];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
+}
+            p_res = &p_bnProcBufRes[72];
+            p_llrRes = (__m256i*) &llrRes  [7296];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
+}
+// Process group with 5 CNs 
+       M = (1*Z + 31)>>5;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [10752];
+   p_bnProcBufRes    = (__m256i*) &bnProcBufRes   [10752];
+            p_res = &p_bnProcBufRes[0];
+            p_llrRes = (__m256i*) &llrRes  [8064];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
+}
+            p_res = &p_bnProcBufRes[12];
+            p_llrRes = (__m256i*) &llrRes  [8064];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
+}
+            p_res = &p_bnProcBufRes[24];
+            p_llrRes = (__m256i*) &llrRes  [8064];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
+}
+            p_res = &p_bnProcBufRes[36];
+            p_llrRes = (__m256i*) &llrRes  [8064];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
+}
+            p_res = &p_bnProcBufRes[48];
+            p_llrRes = (__m256i*) &llrRes  [8064];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
+}
+// Process group with 6 CNs 
+       M = (5*Z + 31)>>5;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [12672];
+   p_bnProcBufRes    = (__m256i*) &bnProcBufRes   [12672];
+            p_res = &p_bnProcBufRes[0];
+            p_llrRes = (__m256i*) &llrRes  [8448];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
+}
+            p_res = &p_bnProcBufRes[60];
+            p_llrRes = (__m256i*) &llrRes  [8448];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
+}
+            p_res = &p_bnProcBufRes[120];
+            p_llrRes = (__m256i*) &llrRes  [8448];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
+}
+            p_res = &p_bnProcBufRes[180];
+            p_llrRes = (__m256i*) &llrRes  [8448];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[180 + i]);
+}
+            p_res = &p_bnProcBufRes[240];
+            p_llrRes = (__m256i*) &llrRes  [8448];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[240 + i]);
+}
+            p_res = &p_bnProcBufRes[300];
+            p_llrRes = (__m256i*) &llrRes  [8448];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[300 + i]);
+}
+// Process group with 7 CNs 
+       M = (1*Z + 31)>>5;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [24192];
+   p_bnProcBufRes    = (__m256i*) &bnProcBufRes   [24192];
+            p_res = &p_bnProcBufRes[0];
+            p_llrRes = (__m256i*) &llrRes  [10368];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
+}
+            p_res = &p_bnProcBufRes[12];
+            p_llrRes = (__m256i*) &llrRes  [10368];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
+}
+            p_res = &p_bnProcBufRes[24];
+            p_llrRes = (__m256i*) &llrRes  [10368];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
+}
+            p_res = &p_bnProcBufRes[36];
+            p_llrRes = (__m256i*) &llrRes  [10368];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
+}
+            p_res = &p_bnProcBufRes[48];
+            p_llrRes = (__m256i*) &llrRes  [10368];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
+}
+            p_res = &p_bnProcBufRes[60];
+            p_llrRes = (__m256i*) &llrRes  [10368];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
+}
+            p_res = &p_bnProcBufRes[72];
+            p_llrRes = (__m256i*) &llrRes  [10368];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
+}
+// Process group with 8 CNs 
+       M = (1*Z + 31)>>5;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [26880];
+   p_bnProcBufRes    = (__m256i*) &bnProcBufRes   [26880];
+            p_res = &p_bnProcBufRes[0];
+            p_llrRes = (__m256i*) &llrRes  [10752];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
+}
+            p_res = &p_bnProcBufRes[12];
+            p_llrRes = (__m256i*) &llrRes  [10752];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
+}
+            p_res = &p_bnProcBufRes[24];
+            p_llrRes = (__m256i*) &llrRes  [10752];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
+}
+            p_res = &p_bnProcBufRes[36];
+            p_llrRes = (__m256i*) &llrRes  [10752];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
+}
+            p_res = &p_bnProcBufRes[48];
+            p_llrRes = (__m256i*) &llrRes  [10752];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
+}
+            p_res = &p_bnProcBufRes[60];
+            p_llrRes = (__m256i*) &llrRes  [10752];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
+}
+            p_res = &p_bnProcBufRes[72];
+            p_llrRes = (__m256i*) &llrRes  [10752];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
+}
+            p_res = &p_bnProcBufRes[84];
+            p_llrRes = (__m256i*) &llrRes  [10752];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
+}
+// Process group with 9 CNs 
+// Process group with 10 CNs 
+// Process group with 11 CNs 
+// Process group with 12 CNs 
+// Process group with 13 CNs 
+       M = (1*Z + 31)>>5;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [29952];
+   p_bnProcBufRes    = (__m256i*) &bnProcBufRes   [29952];
+            p_res = &p_bnProcBufRes[0];
+            p_llrRes = (__m256i*) &llrRes  [11136];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
+}
+            p_res = &p_bnProcBufRes[12];
+            p_llrRes = (__m256i*) &llrRes  [11136];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
+}
+            p_res = &p_bnProcBufRes[24];
+            p_llrRes = (__m256i*) &llrRes  [11136];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
+}
+            p_res = &p_bnProcBufRes[36];
+            p_llrRes = (__m256i*) &llrRes  [11136];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
+}
+            p_res = &p_bnProcBufRes[48];
+            p_llrRes = (__m256i*) &llrRes  [11136];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
+}
+            p_res = &p_bnProcBufRes[60];
+            p_llrRes = (__m256i*) &llrRes  [11136];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
+}
+            p_res = &p_bnProcBufRes[72];
+            p_llrRes = (__m256i*) &llrRes  [11136];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
+}
+            p_res = &p_bnProcBufRes[84];
+            p_llrRes = (__m256i*) &llrRes  [11136];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
+}
+            p_res = &p_bnProcBufRes[96];
+            p_llrRes = (__m256i*) &llrRes  [11136];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
+}
+            p_res = &p_bnProcBufRes[108];
+            p_llrRes = (__m256i*) &llrRes  [11136];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
+}
+            p_res = &p_bnProcBufRes[120];
+            p_llrRes = (__m256i*) &llrRes  [11136];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
+}
+            p_res = &p_bnProcBufRes[132];
+            p_llrRes = (__m256i*) &llrRes  [11136];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]);
+}
+            p_res = &p_bnProcBufRes[144];
+            p_llrRes = (__m256i*) &llrRes  [11136];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]);
+}
+// Process group with 14 CNs 
+       M = (1*Z + 31)>>5;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [34944];
+   p_bnProcBufRes    = (__m256i*) &bnProcBufRes   [34944];
+            p_res = &p_bnProcBufRes[0];
+            p_llrRes = (__m256i*) &llrRes  [11520];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
+}
+            p_res = &p_bnProcBufRes[12];
+            p_llrRes = (__m256i*) &llrRes  [11520];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
+}
+            p_res = &p_bnProcBufRes[24];
+            p_llrRes = (__m256i*) &llrRes  [11520];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
+}
+            p_res = &p_bnProcBufRes[36];
+            p_llrRes = (__m256i*) &llrRes  [11520];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
+}
+            p_res = &p_bnProcBufRes[48];
+            p_llrRes = (__m256i*) &llrRes  [11520];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
+}
+            p_res = &p_bnProcBufRes[60];
+            p_llrRes = (__m256i*) &llrRes  [11520];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
+}
+            p_res = &p_bnProcBufRes[72];
+            p_llrRes = (__m256i*) &llrRes  [11520];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
+}
+            p_res = &p_bnProcBufRes[84];
+            p_llrRes = (__m256i*) &llrRes  [11520];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
+}
+            p_res = &p_bnProcBufRes[96];
+            p_llrRes = (__m256i*) &llrRes  [11520];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
+}
+            p_res = &p_bnProcBufRes[108];
+            p_llrRes = (__m256i*) &llrRes  [11520];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
+}
+            p_res = &p_bnProcBufRes[120];
+            p_llrRes = (__m256i*) &llrRes  [11520];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
+}
+            p_res = &p_bnProcBufRes[132];
+            p_llrRes = (__m256i*) &llrRes  [11520];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]);
+}
+            p_res = &p_bnProcBufRes[144];
+            p_llrRes = (__m256i*) &llrRes  [11520];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]);
+}
+            p_res = &p_bnProcBufRes[156];
+            p_llrRes = (__m256i*) &llrRes  [11520];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]);
+}
+// Process group with 15 CNs 
+// Process group with 16 CNs 
+       M = (1*Z + 31)>>5;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [40320];
+   p_bnProcBufRes    = (__m256i*) &bnProcBufRes   [40320];
+            p_res = &p_bnProcBufRes[0];
+            p_llrRes = (__m256i*) &llrRes  [11904];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
+}
+            p_res = &p_bnProcBufRes[12];
+            p_llrRes = (__m256i*) &llrRes  [11904];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
+}
+            p_res = &p_bnProcBufRes[24];
+            p_llrRes = (__m256i*) &llrRes  [11904];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
+}
+            p_res = &p_bnProcBufRes[36];
+            p_llrRes = (__m256i*) &llrRes  [11904];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
+}
+            p_res = &p_bnProcBufRes[48];
+            p_llrRes = (__m256i*) &llrRes  [11904];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
+}
+            p_res = &p_bnProcBufRes[60];
+            p_llrRes = (__m256i*) &llrRes  [11904];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
+}
+            p_res = &p_bnProcBufRes[72];
+            p_llrRes = (__m256i*) &llrRes  [11904];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
+}
+            p_res = &p_bnProcBufRes[84];
+            p_llrRes = (__m256i*) &llrRes  [11904];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
+}
+            p_res = &p_bnProcBufRes[96];
+            p_llrRes = (__m256i*) &llrRes  [11904];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
+}
+            p_res = &p_bnProcBufRes[108];
+            p_llrRes = (__m256i*) &llrRes  [11904];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
+}
+            p_res = &p_bnProcBufRes[120];
+            p_llrRes = (__m256i*) &llrRes  [11904];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
+}
+            p_res = &p_bnProcBufRes[132];
+            p_llrRes = (__m256i*) &llrRes  [11904];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]);
+}
+            p_res = &p_bnProcBufRes[144];
+            p_llrRes = (__m256i*) &llrRes  [11904];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]);
+}
+            p_res = &p_bnProcBufRes[156];
+            p_llrRes = (__m256i*) &llrRes  [11904];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]);
+}
+            p_res = &p_bnProcBufRes[168];
+            p_llrRes = (__m256i*) &llrRes  [11904];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]);
+}
+            p_res = &p_bnProcBufRes[180];
+            p_llrRes = (__m256i*) &llrRes  [11904];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[180 + i]);
+}
+// Process group with 17 CNs 
+// Process group with 18 CNs 
+// Process group with 19 CNs 
+// Process group with 20 CNs 
+// Process group with 21 CNs 
+// Process group with 22 CNs 
+// Process group with <23 CNs 
+// Process group with 24 CNs 
+// Process group with 25 CNs 
+// Process group with 26 CNs 
+// Process group with 27 CNs 
+// Process group with 28 CNs 
+// Process group with 29 CNs 
+// Process group with 30 CNs 
+}
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R15_AVX2.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R15_AVX2.c
new file mode 100644
index 0000000000000000000000000000000000000000..f649102df7a8cde518242a39292f8180778425e4
--- /dev/null
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R15_AVX2.c
@@ -0,0 +1,799 @@
+#include <stdint.h>
+#include <immintrin.h>
+void nrLDPC_bnProc_BG2_R15_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes,  int8_t* llrRes, uint16_t Z  ) {
+        __m256i* p_bnProcBuf; 
+        __m256i* p_bnProcBufRes; 
+        __m256i* p_llrRes; 
+        __m256i* p_res; 
+        uint32_t M, i; 
+// Process group with 2 CNs 
+// Process group with 3 CNs 
+// Process group with 4 CNs 
+// Process group with 5 CNs 
+       M = (2*Z + 31)>>5;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [14592];
+   p_bnProcBufRes    = (__m256i*) &bnProcBufRes   [14592];
+            p_res = &p_bnProcBufRes[0];
+            p_llrRes = (__m256i*) &llrRes  [14592];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
+}
+            p_res = &p_bnProcBufRes[24];
+            p_llrRes = (__m256i*) &llrRes  [14592];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
+}
+            p_res = &p_bnProcBufRes[48];
+            p_llrRes = (__m256i*) &llrRes  [14592];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
+}
+            p_res = &p_bnProcBufRes[72];
+            p_llrRes = (__m256i*) &llrRes  [14592];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
+}
+            p_res = &p_bnProcBufRes[96];
+            p_llrRes = (__m256i*) &llrRes  [14592];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
+}
+// Process group with 6 CNs 
+       M = (1*Z + 31)>>5;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [18432];
+   p_bnProcBufRes    = (__m256i*) &bnProcBufRes   [18432];
+            p_res = &p_bnProcBufRes[0];
+            p_llrRes = (__m256i*) &llrRes  [15360];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
+}
+            p_res = &p_bnProcBufRes[12];
+            p_llrRes = (__m256i*) &llrRes  [15360];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
+}
+            p_res = &p_bnProcBufRes[24];
+            p_llrRes = (__m256i*) &llrRes  [15360];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
+}
+            p_res = &p_bnProcBufRes[36];
+            p_llrRes = (__m256i*) &llrRes  [15360];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
+}
+            p_res = &p_bnProcBufRes[48];
+            p_llrRes = (__m256i*) &llrRes  [15360];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
+}
+            p_res = &p_bnProcBufRes[60];
+            p_llrRes = (__m256i*) &llrRes  [15360];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
+}
+// Process group with 7 CNs 
+       M = (1*Z + 31)>>5;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [20736];
+   p_bnProcBufRes    = (__m256i*) &bnProcBufRes   [20736];
+            p_res = &p_bnProcBufRes[0];
+            p_llrRes = (__m256i*) &llrRes  [15744];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
+}
+            p_res = &p_bnProcBufRes[12];
+            p_llrRes = (__m256i*) &llrRes  [15744];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
+}
+            p_res = &p_bnProcBufRes[24];
+            p_llrRes = (__m256i*) &llrRes  [15744];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
+}
+            p_res = &p_bnProcBufRes[36];
+            p_llrRes = (__m256i*) &llrRes  [15744];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
+}
+            p_res = &p_bnProcBufRes[48];
+            p_llrRes = (__m256i*) &llrRes  [15744];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
+}
+            p_res = &p_bnProcBufRes[60];
+            p_llrRes = (__m256i*) &llrRes  [15744];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
+}
+            p_res = &p_bnProcBufRes[72];
+            p_llrRes = (__m256i*) &llrRes  [15744];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
+}
+// Process group with 8 CNs 
+       M = (1*Z + 31)>>5;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [23424];
+   p_bnProcBufRes    = (__m256i*) &bnProcBufRes   [23424];
+            p_res = &p_bnProcBufRes[0];
+            p_llrRes = (__m256i*) &llrRes  [16128];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
+}
+            p_res = &p_bnProcBufRes[12];
+            p_llrRes = (__m256i*) &llrRes  [16128];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
+}
+            p_res = &p_bnProcBufRes[24];
+            p_llrRes = (__m256i*) &llrRes  [16128];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
+}
+            p_res = &p_bnProcBufRes[36];
+            p_llrRes = (__m256i*) &llrRes  [16128];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
+}
+            p_res = &p_bnProcBufRes[48];
+            p_llrRes = (__m256i*) &llrRes  [16128];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
+}
+            p_res = &p_bnProcBufRes[60];
+            p_llrRes = (__m256i*) &llrRes  [16128];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
+}
+            p_res = &p_bnProcBufRes[72];
+            p_llrRes = (__m256i*) &llrRes  [16128];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
+}
+            p_res = &p_bnProcBufRes[84];
+            p_llrRes = (__m256i*) &llrRes  [16128];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
+}
+// Process group with 9 CNs 
+       M = (2*Z + 31)>>5;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [26496];
+   p_bnProcBufRes    = (__m256i*) &bnProcBufRes   [26496];
+            p_res = &p_bnProcBufRes[0];
+            p_llrRes = (__m256i*) &llrRes  [16512];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
+}
+            p_res = &p_bnProcBufRes[24];
+            p_llrRes = (__m256i*) &llrRes  [16512];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
+}
+            p_res = &p_bnProcBufRes[48];
+            p_llrRes = (__m256i*) &llrRes  [16512];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
+}
+            p_res = &p_bnProcBufRes[72];
+            p_llrRes = (__m256i*) &llrRes  [16512];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
+}
+            p_res = &p_bnProcBufRes[96];
+            p_llrRes = (__m256i*) &llrRes  [16512];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
+}
+            p_res = &p_bnProcBufRes[120];
+            p_llrRes = (__m256i*) &llrRes  [16512];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
+}
+            p_res = &p_bnProcBufRes[144];
+            p_llrRes = (__m256i*) &llrRes  [16512];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]);
+}
+            p_res = &p_bnProcBufRes[168];
+            p_llrRes = (__m256i*) &llrRes  [16512];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]);
+}
+            p_res = &p_bnProcBufRes[192];
+            p_llrRes = (__m256i*) &llrRes  [16512];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[192 + i]);
+}
+// Process group with 10 CNs 
+       M = (1*Z + 31)>>5;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [33408];
+   p_bnProcBufRes    = (__m256i*) &bnProcBufRes   [33408];
+            p_res = &p_bnProcBufRes[0];
+            p_llrRes = (__m256i*) &llrRes  [17280];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
+}
+            p_res = &p_bnProcBufRes[12];
+            p_llrRes = (__m256i*) &llrRes  [17280];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
+}
+            p_res = &p_bnProcBufRes[24];
+            p_llrRes = (__m256i*) &llrRes  [17280];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
+}
+            p_res = &p_bnProcBufRes[36];
+            p_llrRes = (__m256i*) &llrRes  [17280];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
+}
+            p_res = &p_bnProcBufRes[48];
+            p_llrRes = (__m256i*) &llrRes  [17280];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
+}
+            p_res = &p_bnProcBufRes[60];
+            p_llrRes = (__m256i*) &llrRes  [17280];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
+}
+            p_res = &p_bnProcBufRes[72];
+            p_llrRes = (__m256i*) &llrRes  [17280];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
+}
+            p_res = &p_bnProcBufRes[84];
+            p_llrRes = (__m256i*) &llrRes  [17280];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
+}
+            p_res = &p_bnProcBufRes[96];
+            p_llrRes = (__m256i*) &llrRes  [17280];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
+}
+            p_res = &p_bnProcBufRes[108];
+            p_llrRes = (__m256i*) &llrRes  [17280];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
+}
+// Process group with 11 CNs 
+// Process group with 12 CNs 
+       M = (1*Z + 31)>>5;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [37248];
+   p_bnProcBufRes    = (__m256i*) &bnProcBufRes   [37248];
+            p_res = &p_bnProcBufRes[0];
+            p_llrRes = (__m256i*) &llrRes  [17664];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
+}
+            p_res = &p_bnProcBufRes[12];
+            p_llrRes = (__m256i*) &llrRes  [17664];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
+}
+            p_res = &p_bnProcBufRes[24];
+            p_llrRes = (__m256i*) &llrRes  [17664];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
+}
+            p_res = &p_bnProcBufRes[36];
+            p_llrRes = (__m256i*) &llrRes  [17664];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
+}
+            p_res = &p_bnProcBufRes[48];
+            p_llrRes = (__m256i*) &llrRes  [17664];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
+}
+            p_res = &p_bnProcBufRes[60];
+            p_llrRes = (__m256i*) &llrRes  [17664];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
+}
+            p_res = &p_bnProcBufRes[72];
+            p_llrRes = (__m256i*) &llrRes  [17664];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
+}
+            p_res = &p_bnProcBufRes[84];
+            p_llrRes = (__m256i*) &llrRes  [17664];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
+}
+            p_res = &p_bnProcBufRes[96];
+            p_llrRes = (__m256i*) &llrRes  [17664];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
+}
+            p_res = &p_bnProcBufRes[108];
+            p_llrRes = (__m256i*) &llrRes  [17664];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
+}
+            p_res = &p_bnProcBufRes[120];
+            p_llrRes = (__m256i*) &llrRes  [17664];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
+}
+            p_res = &p_bnProcBufRes[132];
+            p_llrRes = (__m256i*) &llrRes  [17664];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]);
+}
+// Process group with 13 CNs 
+       M = (1*Z + 31)>>5;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [41856];
+   p_bnProcBufRes    = (__m256i*) &bnProcBufRes   [41856];
+            p_res = &p_bnProcBufRes[0];
+            p_llrRes = (__m256i*) &llrRes  [18048];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
+}
+            p_res = &p_bnProcBufRes[12];
+            p_llrRes = (__m256i*) &llrRes  [18048];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
+}
+            p_res = &p_bnProcBufRes[24];
+            p_llrRes = (__m256i*) &llrRes  [18048];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
+}
+            p_res = &p_bnProcBufRes[36];
+            p_llrRes = (__m256i*) &llrRes  [18048];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
+}
+            p_res = &p_bnProcBufRes[48];
+            p_llrRes = (__m256i*) &llrRes  [18048];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
+}
+            p_res = &p_bnProcBufRes[60];
+            p_llrRes = (__m256i*) &llrRes  [18048];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
+}
+            p_res = &p_bnProcBufRes[72];
+            p_llrRes = (__m256i*) &llrRes  [18048];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
+}
+            p_res = &p_bnProcBufRes[84];
+            p_llrRes = (__m256i*) &llrRes  [18048];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
+}
+            p_res = &p_bnProcBufRes[96];
+            p_llrRes = (__m256i*) &llrRes  [18048];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
+}
+            p_res = &p_bnProcBufRes[108];
+            p_llrRes = (__m256i*) &llrRes  [18048];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
+}
+            p_res = &p_bnProcBufRes[120];
+            p_llrRes = (__m256i*) &llrRes  [18048];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
+}
+            p_res = &p_bnProcBufRes[132];
+            p_llrRes = (__m256i*) &llrRes  [18048];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]);
+}
+            p_res = &p_bnProcBufRes[144];
+            p_llrRes = (__m256i*) &llrRes  [18048];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]);
+}
+// Process group with 14 CNs 
+       M = (1*Z + 31)>>5;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [46848];
+   p_bnProcBufRes    = (__m256i*) &bnProcBufRes   [46848];
+            p_res = &p_bnProcBufRes[0];
+            p_llrRes = (__m256i*) &llrRes  [18432];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
+}
+            p_res = &p_bnProcBufRes[12];
+            p_llrRes = (__m256i*) &llrRes  [18432];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
+}
+            p_res = &p_bnProcBufRes[24];
+            p_llrRes = (__m256i*) &llrRes  [18432];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
+}
+            p_res = &p_bnProcBufRes[36];
+            p_llrRes = (__m256i*) &llrRes  [18432];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
+}
+            p_res = &p_bnProcBufRes[48];
+            p_llrRes = (__m256i*) &llrRes  [18432];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
+}
+            p_res = &p_bnProcBufRes[60];
+            p_llrRes = (__m256i*) &llrRes  [18432];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
+}
+            p_res = &p_bnProcBufRes[72];
+            p_llrRes = (__m256i*) &llrRes  [18432];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
+}
+            p_res = &p_bnProcBufRes[84];
+            p_llrRes = (__m256i*) &llrRes  [18432];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
+}
+            p_res = &p_bnProcBufRes[96];
+            p_llrRes = (__m256i*) &llrRes  [18432];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
+}
+            p_res = &p_bnProcBufRes[108];
+            p_llrRes = (__m256i*) &llrRes  [18432];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
+}
+            p_res = &p_bnProcBufRes[120];
+            p_llrRes = (__m256i*) &llrRes  [18432];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
+}
+            p_res = &p_bnProcBufRes[132];
+            p_llrRes = (__m256i*) &llrRes  [18432];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]);
+}
+            p_res = &p_bnProcBufRes[144];
+            p_llrRes = (__m256i*) &llrRes  [18432];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]);
+}
+            p_res = &p_bnProcBufRes[156];
+            p_llrRes = (__m256i*) &llrRes  [18432];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]);
+}
+// Process group with 15 CNs 
+// Process group with 16 CNs 
+       M = (1*Z + 31)>>5;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [52224];
+   p_bnProcBufRes    = (__m256i*) &bnProcBufRes   [52224];
+            p_res = &p_bnProcBufRes[0];
+            p_llrRes = (__m256i*) &llrRes  [18816];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
+}
+            p_res = &p_bnProcBufRes[12];
+            p_llrRes = (__m256i*) &llrRes  [18816];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
+}
+            p_res = &p_bnProcBufRes[24];
+            p_llrRes = (__m256i*) &llrRes  [18816];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
+}
+            p_res = &p_bnProcBufRes[36];
+            p_llrRes = (__m256i*) &llrRes  [18816];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
+}
+            p_res = &p_bnProcBufRes[48];
+            p_llrRes = (__m256i*) &llrRes  [18816];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
+}
+            p_res = &p_bnProcBufRes[60];
+            p_llrRes = (__m256i*) &llrRes  [18816];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
+}
+            p_res = &p_bnProcBufRes[72];
+            p_llrRes = (__m256i*) &llrRes  [18816];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
+}
+            p_res = &p_bnProcBufRes[84];
+            p_llrRes = (__m256i*) &llrRes  [18816];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
+}
+            p_res = &p_bnProcBufRes[96];
+            p_llrRes = (__m256i*) &llrRes  [18816];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
+}
+            p_res = &p_bnProcBufRes[108];
+            p_llrRes = (__m256i*) &llrRes  [18816];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
+}
+            p_res = &p_bnProcBufRes[120];
+            p_llrRes = (__m256i*) &llrRes  [18816];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
+}
+            p_res = &p_bnProcBufRes[132];
+            p_llrRes = (__m256i*) &llrRes  [18816];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]);
+}
+            p_res = &p_bnProcBufRes[144];
+            p_llrRes = (__m256i*) &llrRes  [18816];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]);
+}
+            p_res = &p_bnProcBufRes[156];
+            p_llrRes = (__m256i*) &llrRes  [18816];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]);
+}
+            p_res = &p_bnProcBufRes[168];
+            p_llrRes = (__m256i*) &llrRes  [18816];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]);
+}
+            p_res = &p_bnProcBufRes[180];
+            p_llrRes = (__m256i*) &llrRes  [18816];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[180 + i]);
+}
+// Process group with 17 CNs 
+// Process group with 18 CNs 
+// Process group with 19 CNs 
+// Process group with 20 CNs 
+// Process group with 21 CNs 
+// Process group with 22 CNs 
+       M = (1*Z + 31)>>5;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [58368];
+   p_bnProcBufRes    = (__m256i*) &bnProcBufRes   [58368];
+            p_res = &p_bnProcBufRes[0];
+            p_llrRes = (__m256i*) &llrRes  [19200];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
+}
+            p_res = &p_bnProcBufRes[12];
+            p_llrRes = (__m256i*) &llrRes  [19200];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
+}
+            p_res = &p_bnProcBufRes[24];
+            p_llrRes = (__m256i*) &llrRes  [19200];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
+}
+            p_res = &p_bnProcBufRes[36];
+            p_llrRes = (__m256i*) &llrRes  [19200];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
+}
+            p_res = &p_bnProcBufRes[48];
+            p_llrRes = (__m256i*) &llrRes  [19200];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
+}
+            p_res = &p_bnProcBufRes[60];
+            p_llrRes = (__m256i*) &llrRes  [19200];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
+}
+            p_res = &p_bnProcBufRes[72];
+            p_llrRes = (__m256i*) &llrRes  [19200];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
+}
+            p_res = &p_bnProcBufRes[84];
+            p_llrRes = (__m256i*) &llrRes  [19200];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
+}
+            p_res = &p_bnProcBufRes[96];
+            p_llrRes = (__m256i*) &llrRes  [19200];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
+}
+            p_res = &p_bnProcBufRes[108];
+            p_llrRes = (__m256i*) &llrRes  [19200];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
+}
+            p_res = &p_bnProcBufRes[120];
+            p_llrRes = (__m256i*) &llrRes  [19200];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
+}
+            p_res = &p_bnProcBufRes[132];
+            p_llrRes = (__m256i*) &llrRes  [19200];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]);
+}
+            p_res = &p_bnProcBufRes[144];
+            p_llrRes = (__m256i*) &llrRes  [19200];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]);
+}
+            p_res = &p_bnProcBufRes[156];
+            p_llrRes = (__m256i*) &llrRes  [19200];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]);
+}
+            p_res = &p_bnProcBufRes[168];
+            p_llrRes = (__m256i*) &llrRes  [19200];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]);
+}
+            p_res = &p_bnProcBufRes[180];
+            p_llrRes = (__m256i*) &llrRes  [19200];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[180 + i]);
+}
+            p_res = &p_bnProcBufRes[192];
+            p_llrRes = (__m256i*) &llrRes  [19200];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[192 + i]);
+}
+            p_res = &p_bnProcBufRes[204];
+            p_llrRes = (__m256i*) &llrRes  [19200];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[204 + i]);
+}
+            p_res = &p_bnProcBufRes[216];
+            p_llrRes = (__m256i*) &llrRes  [19200];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[216 + i]);
+}
+            p_res = &p_bnProcBufRes[228];
+            p_llrRes = (__m256i*) &llrRes  [19200];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[228 + i]);
+}
+            p_res = &p_bnProcBufRes[240];
+            p_llrRes = (__m256i*) &llrRes  [19200];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[240 + i]);
+}
+            p_res = &p_bnProcBufRes[252];
+            p_llrRes = (__m256i*) &llrRes  [19200];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[252 + i]);
+}
+// Process group with <23 CNs 
+       M = (1*Z + 31)>>5;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [66816];
+   p_bnProcBufRes    = (__m256i*) &bnProcBufRes   [66816];
+            p_res = &p_bnProcBufRes[0];
+            p_llrRes = (__m256i*) &llrRes  [19584];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
+}
+            p_res = &p_bnProcBufRes[12];
+            p_llrRes = (__m256i*) &llrRes  [19584];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
+}
+            p_res = &p_bnProcBufRes[24];
+            p_llrRes = (__m256i*) &llrRes  [19584];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
+}
+            p_res = &p_bnProcBufRes[36];
+            p_llrRes = (__m256i*) &llrRes  [19584];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
+}
+            p_res = &p_bnProcBufRes[48];
+            p_llrRes = (__m256i*) &llrRes  [19584];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
+}
+            p_res = &p_bnProcBufRes[60];
+            p_llrRes = (__m256i*) &llrRes  [19584];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
+}
+            p_res = &p_bnProcBufRes[72];
+            p_llrRes = (__m256i*) &llrRes  [19584];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
+}
+            p_res = &p_bnProcBufRes[84];
+            p_llrRes = (__m256i*) &llrRes  [19584];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]);
+}
+            p_res = &p_bnProcBufRes[96];
+            p_llrRes = (__m256i*) &llrRes  [19584];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
+}
+            p_res = &p_bnProcBufRes[108];
+            p_llrRes = (__m256i*) &llrRes  [19584];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
+}
+            p_res = &p_bnProcBufRes[120];
+            p_llrRes = (__m256i*) &llrRes  [19584];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
+}
+            p_res = &p_bnProcBufRes[132];
+            p_llrRes = (__m256i*) &llrRes  [19584];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]);
+}
+            p_res = &p_bnProcBufRes[144];
+            p_llrRes = (__m256i*) &llrRes  [19584];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]);
+}
+            p_res = &p_bnProcBufRes[156];
+            p_llrRes = (__m256i*) &llrRes  [19584];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]);
+}
+            p_res = &p_bnProcBufRes[168];
+            p_llrRes = (__m256i*) &llrRes  [19584];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]);
+}
+            p_res = &p_bnProcBufRes[180];
+            p_llrRes = (__m256i*) &llrRes  [19584];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[180 + i]);
+}
+            p_res = &p_bnProcBufRes[192];
+            p_llrRes = (__m256i*) &llrRes  [19584];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[192 + i]);
+}
+            p_res = &p_bnProcBufRes[204];
+            p_llrRes = (__m256i*) &llrRes  [19584];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[204 + i]);
+}
+            p_res = &p_bnProcBufRes[216];
+            p_llrRes = (__m256i*) &llrRes  [19584];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[216 + i]);
+}
+            p_res = &p_bnProcBufRes[228];
+            p_llrRes = (__m256i*) &llrRes  [19584];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[228 + i]);
+}
+            p_res = &p_bnProcBufRes[240];
+            p_llrRes = (__m256i*) &llrRes  [19584];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[240 + i]);
+}
+            p_res = &p_bnProcBufRes[252];
+            p_llrRes = (__m256i*) &llrRes  [19584];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[252 + i]);
+}
+            p_res = &p_bnProcBufRes[264];
+            p_llrRes = (__m256i*) &llrRes  [19584];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[264 + i]);
+}
+// Process group with 24 CNs 
+// Process group with 25 CNs 
+// Process group with 26 CNs 
+// Process group with 27 CNs 
+// Process group with 28 CNs 
+// Process group with 29 CNs 
+// Process group with 30 CNs 
+}
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R23_AVX2.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R23_AVX2.c
new file mode 100644
index 0000000000000000000000000000000000000000..59a1613099c8da476887c855153e5a4b2ebdd575
--- /dev/null
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R23_AVX2.c
@@ -0,0 +1,153 @@
+#include <stdint.h>
+#include <immintrin.h>
+void nrLDPC_bnProc_BG2_R23_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes,  int8_t* llrRes, uint16_t Z  ) {
+        __m256i* p_bnProcBuf; 
+        __m256i* p_bnProcBufRes; 
+        __m256i* p_llrRes; 
+        __m256i* p_res; 
+        uint32_t M, i; 
+// Process group with 2 CNs 
+ M = (3*Z + 31)>>5;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [1152];
+   p_bnProcBufRes    = (__m256i*) &bnProcBufRes   [1152];
+            p_res = &p_bnProcBufRes[0];
+            p_llrRes = (__m256i*) &llrRes  [1152];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
+}
+            p_res = &p_bnProcBufRes[36];
+            p_llrRes = (__m256i*) &llrRes  [1152];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
+}
+// Process group with 3 CNs 
+       M = (5*Z + 31)>>5;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [3456];
+   p_bnProcBufRes    = (__m256i*) &bnProcBufRes   [3456];
+            p_res = &p_bnProcBufRes[0];
+            p_llrRes = (__m256i*) &llrRes  [2304];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
+}
+            p_res = &p_bnProcBufRes[60];
+            p_llrRes = (__m256i*) &llrRes  [2304];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
+}
+            p_res = &p_bnProcBufRes[120];
+            p_llrRes = (__m256i*) &llrRes  [2304];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]);
+}
+// Process group with 4 CNs 
+       M = (3*Z + 31)>>5;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [9216];
+   p_bnProcBufRes    = (__m256i*) &bnProcBufRes   [9216];
+            p_res = &p_bnProcBufRes[0];
+            p_llrRes = (__m256i*) &llrRes  [4224];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
+}
+            p_res = &p_bnProcBufRes[36];
+            p_llrRes = (__m256i*) &llrRes  [4224];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
+}
+            p_res = &p_bnProcBufRes[72];
+            p_llrRes = (__m256i*) &llrRes  [4224];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
+}
+            p_res = &p_bnProcBufRes[108];
+            p_llrRes = (__m256i*) &llrRes  [4224];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]);
+}
+// Process group with 5 CNs 
+       M = (2*Z + 31)>>5;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [13824];
+   p_bnProcBufRes    = (__m256i*) &bnProcBufRes   [13824];
+            p_res = &p_bnProcBufRes[0];
+            p_llrRes = (__m256i*) &llrRes  [5376];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
+}
+            p_res = &p_bnProcBufRes[24];
+            p_llrRes = (__m256i*) &llrRes  [5376];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
+}
+            p_res = &p_bnProcBufRes[48];
+            p_llrRes = (__m256i*) &llrRes  [5376];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
+}
+            p_res = &p_bnProcBufRes[72];
+            p_llrRes = (__m256i*) &llrRes  [5376];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]);
+}
+            p_res = &p_bnProcBufRes[96];
+            p_llrRes = (__m256i*) &llrRes  [5376];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]);
+}
+// Process group with 6 CNs 
+       M = (1*Z + 31)>>5;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [17664];
+   p_bnProcBufRes    = (__m256i*) &bnProcBufRes   [17664];
+            p_res = &p_bnProcBufRes[0];
+            p_llrRes = (__m256i*) &llrRes  [6144];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]);
+}
+            p_res = &p_bnProcBufRes[12];
+            p_llrRes = (__m256i*) &llrRes  [6144];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]);
+}
+            p_res = &p_bnProcBufRes[24];
+            p_llrRes = (__m256i*) &llrRes  [6144];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]);
+}
+            p_res = &p_bnProcBufRes[36];
+            p_llrRes = (__m256i*) &llrRes  [6144];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]);
+}
+            p_res = &p_bnProcBufRes[48];
+            p_llrRes = (__m256i*) &llrRes  [6144];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]);
+}
+            p_res = &p_bnProcBufRes[60];
+            p_llrRes = (__m256i*) &llrRes  [6144];
+            for (i=0;i<M;i++) {
+            p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]);
+}
+// Process group with 7 CNs 
+// Process group with 8 CNs 
+// Process group with 9 CNs 
+// Process group with 10 CNs 
+// Process group with 11 CNs 
+// Process group with 12 CNs 
+// Process group with 13 CNs 
+// Process group with 14 CNs 
+// Process group with 15 CNs 
+// Process group with 16 CNs 
+// Process group with 17 CNs 
+// Process group with 18 CNs 
+// Process group with 19 CNs 
+// Process group with 20 CNs 
+// Process group with 21 CNs 
+// Process group with 22 CNs 
+// Process group with <23 CNs 
+// Process group with 24 CNs 
+// Process group with 25 CNs 
+// Process group with 26 CNs 
+// Process group with 27 CNs 
+// Process group with 28 CNs 
+// Process group with 29 CNs 
+// Process group with 30 CNs 
+}
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG1_R13_AVX2.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG1_R13_AVX2.h
index a4508d2d7a3db278a3700ccba3ae4c0c0fad8168..b10a842171b778f7d4323b6e1ad35ac6deb01b95 100644
--- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG1_R13_AVX2.h
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG1_R13_AVX2.h
@@ -5,20 +5,6 @@ static inline void nrLDPC_bnProcPc_BG1_R13_AVX2(int8_t* bnProcBuf,int8_t* llrRes
         __m256i* p_llrRes; 
          uint32_t M ;
 // Process group with 1 CNs 
- M = (42*Z + 31)>>5;
-    p_bnProcBuf     = (__m128i*) &bnProcBuf    [0];
-    p_llrProcBuf    = (__m128i*) &llrProcBuf   [0];
-    p_llrRes        = (__m256i*) &llrRes       [0];
-            for (int i=0,j=0;i<M;i++,j+=2) {
-            ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);
-            ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j + 1]);
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
-            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);
-}
 // Process group with 2 CNs 
 // Process group with 3 CNs 
 // Process group with 4 CNs 
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG1_R23_AVX2.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG1_R23_AVX2.h
index 47fd449fe44011f19cfdd68225d5c79fb62e37d5..8647e8cde6a758fa7e38dbbda53184d7559cfc38 100644
--- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG1_R23_AVX2.h
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG1_R23_AVX2.h
@@ -5,20 +5,6 @@ static inline void nrLDPC_bnProcPc_BG1_R23_AVX2(int8_t* bnProcBuf,int8_t* llrRes
         __m256i* p_llrRes; 
          uint32_t M ;
 // Process group with 1 CNs 
- M = (9*Z + 31)>>5;
-    p_bnProcBuf     = (__m128i*) &bnProcBuf    [0];
-    p_llrProcBuf    = (__m128i*) &llrProcBuf   [0];
-    p_llrRes        = (__m256i*) &llrRes       [0];
-            for (int i=0,j=0;i<M;i++,j+=2) {
-            ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);
-            ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j + 1]);
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
-            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);
-}
 // Process group with 2 CNs 
  M = (1*Z + 31)>>5;
     p_bnProcBuf     = (__m128i*) &bnProcBuf    [3456];
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG1_R89_AVX2.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG1_R89_AVX2.h
index 8c7fa43f5d51237654926b1f51cc3b66b907533c..aee271db84d036d34c317d326ede442d84b51056 100644
--- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG1_R89_AVX2.h
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG1_R89_AVX2.h
@@ -5,20 +5,6 @@ static inline void nrLDPC_bnProcPc_BG1_R89_AVX2(int8_t* bnProcBuf,int8_t* llrRes
         __m256i* p_llrRes; 
          uint32_t M ;
 // Process group with 1 CNs 
- M = (1*Z + 31)>>5;
-    p_bnProcBuf     = (__m128i*) &bnProcBuf    [0];
-    p_llrProcBuf    = (__m128i*) &llrProcBuf   [0];
-    p_llrRes        = (__m256i*) &llrRes       [0];
-            for (int i=0,j=0;i<M;i++,j+=2) {
-            ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]);
-            ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[j + 1]);
-            ymm0    = _mm256_cvtepi8_epi16(p_llrProcBuf[j]);
-            ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0);
-            ymm1    = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
-            ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1);
-            ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1);
-            p_llrRes[i] = _mm256_permute4x64_epi64(ymm0, 0xD8);
-}
 // Process group with 2 CNs 
  M = (3*Z + 31)>>5;
     p_bnProcBuf     = (__m128i*) &bnProcBuf    [384];
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG1_R13_AVX512.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG1_R13_AVX512.h
new file mode 100644
index 0000000000000000000000000000000000000000..1db81b7a061e6c15bcbe6d842f7765a708aa16b3
--- /dev/null
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG1_R13_AVX512.h
@@ -0,0 +1,743 @@
+static inline void nrLDPC_bnProcPc_BG1_R13_AVX512(int8_t* bnProcBuf,int8_t* llrRes ,  int8_t* llrProcBuf, uint16_t Z ) {
+   __m512i zmm0, zmm1, zmmRes0, zmmRes1;  
+        __m256i* p_bnProcBuf; 
+        __m256i* p_llrProcBuf;
+        __m512i* p_llrRes; 
+         uint32_t M ;
+// Process group with 1 CNs 
+ M = (42*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [0];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [0];
+    p_llrRes        = (__m512i*) &llrRes       [0];
+            for (int i=0,j=0;i<M;i++,j+=2) {
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[j + 1]);
+            zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+            zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);
+            zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 2 CNs 
+// Process group with 3 CNs 
+// Process group with 4 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [16128];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [16128];
+    p_llrRes        = (__m512i*) &llrRes       [16128];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);
+        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 5 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [17664];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [16512];
+    p_llrRes        = (__m512i*) &llrRes       [16512];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);
+        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 6 CNs 
+ M = (2*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [19584];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [16896];
+    p_llrRes        = (__m512i*) &llrRes       [16896];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);
+        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 7 CNs 
+ M = (4*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [24192];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [17664];
+    p_llrRes        = (__m512i*) &llrRes       [17664];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[192 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[192 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[240 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[240 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[288 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[288 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);
+        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 8 CNs 
+ M = (3*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [34944];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [19200];
+    p_llrRes        = (__m512i*) &llrRes       [19200];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[180 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[180 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[216 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[216 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[252 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[252 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);
+        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 9 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [44160];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [20352];
+    p_llrRes        = (__m512i*) &llrRes       [20352];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);
+        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 10 CNs 
+ M = (4*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [47616];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [20736];
+    p_llrRes        = (__m512i*) &llrRes       [20736];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[192 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[192 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[240 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[240 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[288 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[288 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[336 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[336 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[384 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[384 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[432 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[432 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);
+        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 11 CNs 
+ M = (3*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [62976];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [22272];
+    p_llrRes        = (__m512i*) &llrRes       [22272];
+            for (int i=0,j=0;i<M;i++,j+=2) {
+            zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+            zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[180 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[180 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[216 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[216 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[252 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[252 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[288 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[288 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[324 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[324 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[360 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[360 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+            zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);
+            zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 12 CNs 
+ M = (4*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [75648];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [23424];
+    p_llrRes        = (__m512i*) &llrRes       [23424];
+            for (int i=0,j=0;i<M;i++,j+=2) {
+            zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+            zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[192 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[192 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[240 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[240 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[288 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[288 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[336 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[336 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[384 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[384 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[432 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[432 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[480 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[480 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[528 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[528 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+            zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);
+            zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 13 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [94080];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [24960];
+    p_llrRes        = (__m512i*) &llrRes       [24960];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j +1]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j +1]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j +1]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j +1]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j +1]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[132 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[132 + j +1]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j +1]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);
+        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 14 CNs 
+// Process group with 15 CNs 
+// Process group with 16 CNs 
+// Process group with 17 CNs 
+// Process group with 18 CNs 
+// Process group with 19 CNs 
+// Process group with 20 CNs 
+// Process group with 21 CNs 
+// Process group with 22 CNs 
+// Process group with <23 CNs 
+// Process group with 24 CNs 
+// Process group with 25 CNs 
+// Process group with 26 CNs 
+// Process group with 27 CNs 
+// Process group with 28 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [99072];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [25344];
+    p_llrRes        = (__m512i*) &llrRes       [25344];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[132 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[132 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[156 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[156 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[168 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[168 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[180 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[180 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[192 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[192 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[204 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[204 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[216 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[216 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[228 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[228 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[240 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[240 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[252 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[252 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[264 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[264 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[276 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[276 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[288 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[288 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[300 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[300 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[312 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[312 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[324 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[324 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);
+        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 29 CNs 
+// Process group with 30 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [109824];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [25728];
+    p_llrRes        = (__m512i*) &llrRes       [25728];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[132 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[132 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[156 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[156 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[168 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[168 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[180 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[180 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[192 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[192 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[204 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[204 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[216 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[216 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[228 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[228 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[240 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[240 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[252 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[252 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[264 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[264 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[276 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[276 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[288 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[288 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[300 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[300 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[312 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[312 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[324 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[324 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[336 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[336 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[348 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[348 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);
+        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+}
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG1_R23_AVX512.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG1_R23_AVX512.h
new file mode 100644
index 0000000000000000000000000000000000000000..902da267d9f611fe0a5a830a8047fe34d401a243
--- /dev/null
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG1_R23_AVX512.h
@@ -0,0 +1,293 @@
+static inline void nrLDPC_bnProcPc_BG1_R23_AVX512(int8_t* bnProcBuf,int8_t* llrRes ,  int8_t* llrProcBuf, uint16_t Z ) {
+   __m512i zmm0, zmm1, zmmRes0, zmmRes1;  
+        __m256i* p_bnProcBuf; 
+        __m256i* p_llrProcBuf;
+        __m512i* p_llrRes; 
+         uint32_t M ;
+// Process group with 1 CNs 
+ M = (9*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [0];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [0];
+    p_llrRes        = (__m512i*) &llrRes       [0];
+            for (int i=0,j=0;i<M;i++,j+=2) {
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[j + 1]);
+            zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+            zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);
+            zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 2 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [3456];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [3456];
+    p_llrRes        = (__m512i*) &llrRes       [3456];
+            for (int i=0,j=0;i<M;i++,j+=2) {
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[j + 1]);
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+            zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);
+            zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 3 CNs 
+ M = (5*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [4224];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [3840];
+    p_llrRes        = (__m512i*) &llrRes       [3840];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j +1]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j +1]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);
+        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 4 CNs 
+ M = (3*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [9984];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [5760];
+    p_llrRes        = (__m512i*) &llrRes       [5760];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);
+        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 5 CNs 
+ M = (7*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [14592];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [6912];
+    p_llrRes        = (__m512i*) &llrRes       [6912];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[168 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[168 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[252 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[252 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[336 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[336 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);
+        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 6 CNs 
+ M = (8*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [28032];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [9600];
+    p_llrRes        = (__m512i*) &llrRes       [9600];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[192 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[192 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[288 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[288 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[384 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[384 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[480 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[480 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);
+        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 7 CNs 
+// Process group with 8 CNs 
+// Process group with 9 CNs 
+// Process group with 10 CNs 
+// Process group with 11 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [46464];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [12672];
+    p_llrRes        = (__m512i*) &llrRes       [12672];
+            for (int i=0,j=0;i<M;i++,j+=2) {
+            zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+            zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+            zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);
+            zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 12 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [50688];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [13056];
+    p_llrRes        = (__m512i*) &llrRes       [13056];
+            for (int i=0,j=0;i<M;i++,j+=2) {
+            zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+            zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[132 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[132 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+            zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);
+            zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 13 CNs 
+// Process group with 14 CNs 
+// Process group with 15 CNs 
+// Process group with 16 CNs 
+// Process group with 17 CNs 
+// Process group with 18 CNs 
+// Process group with 19 CNs 
+// Process group with 20 CNs 
+// Process group with 21 CNs 
+// Process group with 22 CNs 
+// Process group with <23 CNs 
+// Process group with 24 CNs 
+// Process group with 25 CNs 
+// Process group with 26 CNs 
+// Process group with 27 CNs 
+// Process group with 28 CNs 
+// Process group with 29 CNs 
+// Process group with 30 CNs 
+}
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG1_R89_AVX512.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG1_R89_AVX512.h
new file mode 100644
index 0000000000000000000000000000000000000000..4731c417df7a969b3f533550589b3314e0d04400
--- /dev/null
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG1_R89_AVX512.h
@@ -0,0 +1,147 @@
+static inline void nrLDPC_bnProcPc_BG1_R89_AVX512(int8_t* bnProcBuf,int8_t* llrRes ,  int8_t* llrProcBuf, uint16_t Z ) {
+   __m512i zmm0, zmm1, zmmRes0, zmmRes1;  
+        __m256i* p_bnProcBuf; 
+        __m256i* p_llrProcBuf;
+        __m512i* p_llrRes; 
+         uint32_t M ;
+// Process group with 1 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [0];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [0];
+    p_llrRes        = (__m512i*) &llrRes       [0];
+            for (int i=0,j=0;i<M;i++,j+=2) {
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[j + 1]);
+            zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+            zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);
+            zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 2 CNs 
+ M = (3*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [384];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [384];
+    p_llrRes        = (__m512i*) &llrRes       [384];
+            for (int i=0,j=0;i<M;i++,j+=2) {
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[j + 1]);
+            zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+            zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+            zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+            zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+            zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);
+            zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 3 CNs 
+ M = (21*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [2688];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [1536];
+    p_llrRes        = (__m512i*) &llrRes       [1536];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[252 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[252 + j +1]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[504 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[504 + j +1]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);
+        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 4 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [26880];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [9600];
+    p_llrRes        = (__m512i*) &llrRes       [9600];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);
+        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 5 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [28416];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [9984];
+    p_llrRes        = (__m512i*) &llrRes       [9984];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+        zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+        zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1); 
+        zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+        zmmRes0 = _mm512_adds_epi16(zmmRes0, zmm0);
+        zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+        zmmRes1 = _mm512_adds_epi16(zmmRes1, zmm1);
+        zmm0 = _mm512_packs_epi16(zmmRes0, zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 6 CNs 
+// Process group with 7 CNs 
+// Process group with 8 CNs 
+// Process group with 9 CNs 
+// Process group with 10 CNs 
+// Process group with 11 CNs 
+// Process group with 12 CNs 
+// Process group with 13 CNs 
+// Process group with 14 CNs 
+// Process group with 15 CNs 
+// Process group with 16 CNs 
+// Process group with 17 CNs 
+// Process group with 18 CNs 
+// Process group with 19 CNs 
+// Process group with 20 CNs 
+// Process group with 21 CNs 
+// Process group with 22 CNs 
+// Process group with <23 CNs 
+// Process group with 24 CNs 
+// Process group with 25 CNs 
+// Process group with 26 CNs 
+// Process group with 27 CNs 
+// Process group with 28 CNs 
+// Process group with 29 CNs 
+// Process group with 30 CNs 
+}
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG2_R13_AVX512.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG2_R13_AVX512.h
new file mode 100644
index 0000000000000000000000000000000000000000..67fc3eea3d3edb3a7aabbe62e6e42f06340c045c
--- /dev/null
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG2_R13_AVX512.h
@@ -0,0 +1,441 @@
+static inline void nrLDPC_bnProcPc_BG2_R13_AVX512(int8_t* bnProcBuf,int8_t* llrRes ,  int8_t* llrProcBuf, uint16_t Z ) {
+   __m512i zmm0,zmm1,zmmRes0,zmmRes1;  
+        __m256i* p_bnProcBuf; 
+        __m256i* p_llrProcBuf;
+        __m512i* p_llrRes; 
+         uint32_t M ;
+// Process group with 1 CNs 
+ M = (18*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [0];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [0];
+    p_llrRes        = (__m512i*) &llrRes       [0];
+            for (int i=0,j=0;i<M;i++,j+=2) {
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+           zmm1 = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+           zmm0    = _mm512_cvtepi8_epi16(p_bnProcBuf[j+1]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+           zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 2 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [6912];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [6912];
+    p_llrRes        = (__m512i*) &llrRes       [6912];
+            for (int i=0,j=0;i<M;i++,j+=2) {
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[j + 1]);
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+           zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 3 CNs 
+// Process group with 4 CNs 
+ M = (2*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [7680];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [7296];
+    p_llrRes        = (__m512i*) &llrRes       [7296];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 5 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [10752];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [8064];
+    p_llrRes        = (__m512i*) &llrRes       [8064];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 6 CNs 
+ M = (5*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [12672];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [8448];
+    p_llrRes        = (__m512i*) &llrRes       [8448];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[180 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[180 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[240 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[240 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[300 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[300 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 7 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [24192];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [10368];
+    p_llrRes        = (__m512i*) &llrRes       [10368];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 8 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [26880];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [10752];
+    p_llrRes        = (__m512i*) &llrRes       [10752];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 9 CNs 
+// Process group with 10 CNs 
+// Process group with 11 CNs 
+// Process group with 12 CNs 
+// Process group with 13 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [29952];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [11136];
+    p_llrRes        = (__m512i*) &llrRes       [11136];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[132 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[132 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 14 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [34944];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [11520];
+    p_llrRes        = (__m512i*) &llrRes       [11520];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[132 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[132 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[156 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[156 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 15 CNs 
+// Process group with 16 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [40320];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [11904];
+    p_llrRes        = (__m512i*) &llrRes       [11904];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[132 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[132 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[156 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[156 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[168 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[168 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[180 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[180 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 17 CNs 
+// Process group with 18 CNs 
+// Process group with 19 CNs 
+// Process group with 20 CNs 
+// Process group with 21 CNs 
+// Process group with 22 CNs 
+// Process group with <23 CNs 
+// Process group with 24 CNs 
+// Process group with 25 CNs 
+// Process group with 26 CNs 
+// Process group with 27 CNs 
+// Process group with 28 CNs 
+// Process group with 29 CNs 
+// Process group with 30 CNs 
+}
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG2_R15_AVX512.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG2_R15_AVX512.h
new file mode 100644
index 0000000000000000000000000000000000000000..b6fce03ca4c4d4fca379c89b0f3eec94ca9d9658
--- /dev/null
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG2_R15_AVX512.h
@@ -0,0 +1,751 @@
+static inline void nrLDPC_bnProcPc_BG2_R15_AVX512(int8_t* bnProcBuf,int8_t* llrRes ,  int8_t* llrProcBuf, uint16_t Z ) {
+   __m512i zmm0,zmm1,zmmRes0,zmmRes1;  
+        __m256i* p_bnProcBuf; 
+        __m256i* p_llrProcBuf;
+        __m512i* p_llrRes; 
+         uint32_t M ;
+// Process group with 1 CNs 
+ M = (38*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [0];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [0];
+    p_llrRes        = (__m512i*) &llrRes       [0];
+            for (int i=0,j=0;i<M;i++,j+=2) {
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+           zmm1 = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+           zmm0    = _mm512_cvtepi8_epi16(p_bnProcBuf[j+1]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+           zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 2 CNs 
+// Process group with 3 CNs 
+// Process group with 4 CNs 
+// Process group with 5 CNs 
+ M = (2*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [14592];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [14592];
+    p_llrRes        = (__m512i*) &llrRes       [14592];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 6 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [18432];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [15360];
+    p_llrRes        = (__m512i*) &llrRes       [15360];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 7 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [20736];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [15744];
+    p_llrRes        = (__m512i*) &llrRes       [15744];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 8 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [23424];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [16128];
+    p_llrRes        = (__m512i*) &llrRes       [16128];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 9 CNs 
+ M = (2*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [26496];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [16512];
+    p_llrRes        = (__m512i*) &llrRes       [16512];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[168 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[168 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[192 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[192 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 10 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [33408];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [17280];
+    p_llrRes        = (__m512i*) &llrRes       [17280];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 11 CNs 
+// Process group with 12 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [37248];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [17664];
+    p_llrRes        = (__m512i*) &llrRes       [17664];
+            for (int i=0,j=0;i<M;i++,j+=2) {
+           zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+           zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[132 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[132 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+           zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 13 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [41856];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [18048];
+    p_llrRes        = (__m512i*) &llrRes       [18048];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[132 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[132 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 14 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [46848];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [18432];
+    p_llrRes        = (__m512i*) &llrRes       [18432];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[132 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[132 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[156 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[156 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 15 CNs 
+// Process group with 16 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [52224];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [18816];
+    p_llrRes        = (__m512i*) &llrRes       [18816];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[132 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[132 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[156 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[156 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[168 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[168 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[180 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[180 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 17 CNs 
+// Process group with 18 CNs 
+// Process group with 19 CNs 
+// Process group with 20 CNs 
+// Process group with 21 CNs 
+// Process group with 22 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [58368];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [19200];
+    p_llrRes        = (__m512i*) &llrRes       [19200];
+            for (int i=0,j=0;i<M;i++,j+=2) {
+           zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+           zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[132 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[132 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[156 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[156 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[168 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[168 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[180 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[180 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[192 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[192 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[204 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[204 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[216 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[216 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[228 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[228 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[240 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[240 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[252 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[252 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+           zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with <23 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [66816];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [19584];
+    p_llrRes        = (__m512i*) &llrRes       [19584];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[84 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[132 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[132 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[144 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[156 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[156 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[168 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[168 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[180 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[180 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[192 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[192 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[204 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[204 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[216 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[216 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[228 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[228 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[240 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[240 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[252 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[252 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[264 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[264 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 24 CNs 
+// Process group with 25 CNs 
+// Process group with 26 CNs 
+// Process group with 27 CNs 
+// Process group with 28 CNs 
+// Process group with 29 CNs 
+// Process group with 30 CNs 
+}
diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG2_R23_AVX512.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG2_R23_AVX512.h
new file mode 100644
index 0000000000000000000000000000000000000000..6330988dd3a1f306137541710367bd604541651b
--- /dev/null
+++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bn_avx512/nrLDPC_bnProcPc_BG2_R23_AVX512.h
@@ -0,0 +1,181 @@
+static inline void nrLDPC_bnProcPc_BG2_R23_AVX512(int8_t* bnProcBuf,int8_t* llrRes ,  int8_t* llrProcBuf, uint16_t Z ) {
+   __m512i zmm0,zmm1,zmmRes0,zmmRes1;  
+        __m256i* p_bnProcBuf; 
+        __m256i* p_llrProcBuf;
+        __m512i* p_llrRes; 
+         uint32_t M ;
+// Process group with 1 CNs 
+ M = (3*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [0];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [0];
+    p_llrRes        = (__m512i*) &llrRes       [0];
+            for (int i=0,j=0;i<M;i++,j+=2) {
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+           zmm1 = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+           zmm0    = _mm512_cvtepi8_epi16(p_bnProcBuf[j+1]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+           zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 2 CNs 
+ M = (3*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [1152];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [1152];
+    p_llrRes        = (__m512i*) &llrRes       [1152];
+            for (int i=0,j=0;i<M;i++,j+=2) {
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[j + 1]);
+           zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+          zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+           zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+           zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+           zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+           zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+           zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 3 CNs 
+ M = (5*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [3456];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [2304];
+    p_llrRes        = (__m512i*) &llrRes       [2304];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[120 + j +1]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 4 CNs 
+ M = (3*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [9216];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [4224];
+    p_llrRes        = (__m512i*) &llrRes       [4224];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[108 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 5 CNs 
+ M = (2*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [13824];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [5376];
+    p_llrRes        = (__m512i*) &llrRes       [5376];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[72 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[96 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 6 CNs 
+ M = (1*Z + 63)>>6;
+    p_bnProcBuf     = (__m256i*) &bnProcBuf    [17664];
+    p_llrProcBuf    = (__m256i*) &llrProcBuf   [6144];
+    p_llrRes        = (__m512i*) &llrRes       [6144];
+        for (int i=0,j=0;i<M;i++,j+=2) {
+       zmmRes0 = _mm512_cvtepi8_epi16(p_bnProcBuf [j]);
+       zmmRes1 = _mm512_cvtepi8_epi16(p_bnProcBuf [j +1]);
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[12 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[24 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[36 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[48 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1 = _mm512_cvtepi8_epi16(p_bnProcBuf[60 + j +1]);
+      zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1); 
+       zmm0    = _mm512_cvtepi8_epi16(p_llrProcBuf[j]);
+       zmmRes0 = _mm512_adds_epi16(zmmRes0,zmm0);
+       zmm1    = _mm512_cvtepi8_epi16(p_llrProcBuf[j +1 ]);
+       zmmRes1 = _mm512_adds_epi16(zmmRes1,zmm1);
+       zmm0 = _mm512_packs_epi16(zmmRes0,zmmRes1);
+            p_llrRes[i] = _mm512_permutex_epi64(zmm0, 0xD8);
+}
+// Process group with 7 CNs 
+// Process group with 8 CNs 
+// Process group with 9 CNs 
+// Process group with 10 CNs 
+// Process group with 11 CNs 
+// Process group with 12 CNs 
+// Process group with 13 CNs 
+// Process group with 14 CNs 
+// Process group with 15 CNs 
+// Process group with 16 CNs 
+// Process group with 17 CNs 
+// Process group with 18 CNs 
+// Process group with 19 CNs 
+// Process group with 20 CNs 
+// Process group with 21 CNs 
+// Process group with 22 CNs 
+// Process group with <23 CNs 
+// Process group with 24 CNs 
+// Process group with 25 CNs 
+// Process group with 26 CNs 
+// Process group with 27 CNs 
+// Process group with 28 CNs 
+// Process group with 29 CNs 
+// Process group with 30 CNs 
+}