diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c index 6061c397fba95da4119895d7971c79bc78696069..ffcad00af4d2b8eddd7361b92fc6674af3d97e14 100644 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_decoder.c @@ -1,3 +1,4 @@ + /* * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more * contributor license agreements. See the NOTICE file distributed with @@ -48,7 +49,6 @@ #include "nrLDPC_tools/ldpc_gen_files/cnProc_avx512/nrLDPC_cnProc_BG2_R13_AVX512.h" #include "nrLDPC_tools/ldpc_gen_files/cnProc_avx512/nrLDPC_cnProc_BG2_R23_AVX512.h" - #else /*---------------------------------------------------------------------- @@ -81,7 +81,18 @@ #include "nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG2_R23_AVX2.h" //bnProc---------------------------------------------------------------- + +#ifdef __AVX512BW__ //BG1------------------------------------------------------------------- +#include "nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R13_AVX512.h" +#include "nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R23_AVX512.h" +#include "nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R89_AVX512.h" +//BG2 -------------------------------------------------------------------- +#include "nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R15_AVX512.h" +#include "nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R13_AVX512.h" +#include "nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R23_AVX512.h" + +#else #include "nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG1_R13_AVX2.h" #include "nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG1_R23_AVX2.h" #include "nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG1_R89_AVX2.h" @@ -90,6 +101,7 @@ #include "nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R13_AVX2.h" #include "nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R23_AVX2.h" +#endif @@ -237,7 +249,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP } } - } + } else { switch (R) @@ -330,7 +342,7 @@ static inline uint32_t nrLDPC_decoder_core(int8_t* p_llr, int8_t* p_out, t_nrLDP break; } } - } + } else { switch (R) @@ -380,41 +392,65 @@ if (BG==1) { case 13: { + #ifdef __AVX512BW__ + nrLDPC_bnProc_BG1_R13_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #else nrLDPC_bnProc_BG1_R13_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #endif break; } case 23: { + #ifdef __AVX512BW__ + nrLDPC_bnProc_BG1_R23_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #else nrLDPC_bnProc_BG1_R23_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #endif break; } case 89: { + #ifdef __AVX512BW__ + nrLDPC_bnProc_BG1_R89_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #else nrLDPC_bnProc_BG1_R89_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #endif break; } } - } + } else { switch (R) { case 15: { + #ifdef __AVX512BW__ + nrLDPC_bnProc_BG2_R15_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #else nrLDPC_bnProc_BG2_R15_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #endif break; } case 13: { + #ifdef __AVX512BW__ + nrLDPC_bnProc_BG2_R13_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #else nrLDPC_bnProc_BG2_R13_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #endif break; } case 23: { + #ifdef __AVX512BW__ + nrLDPC_bnProc_BG2_R23_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #else nrLDPC_bnProc_BG2_R23_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #endif break; } @@ -501,7 +537,7 @@ if (BG==1) } } - } + } else { switch (R) @@ -592,7 +628,7 @@ if (BG==1) break; } } - } + } else { switch (R) @@ -634,47 +670,72 @@ if (BG==1) #endif // nrLDPC_bnProc(p_lut, p_procBuf, Z); + if (BG==1) { switch (R) { case 13: { + #ifdef __AVX512BW__ + nrLDPC_bnProc_BG1_R13_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #else nrLDPC_bnProc_BG1_R13_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #endif break; } case 23: { + #ifdef __AVX512BW__ + nrLDPC_bnProc_BG1_R23_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #else nrLDPC_bnProc_BG1_R23_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #endif break; } case 89: { + #ifdef __AVX512BW__ + nrLDPC_bnProc_BG1_R89_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #else nrLDPC_bnProc_BG1_R89_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #endif break; } } - } + } else { switch (R) { case 15: { + #ifdef __AVX512BW__ + nrLDPC_bnProc_BG2_R15_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #else nrLDPC_bnProc_BG2_R15_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #endif break; } case 13: { + #ifdef __AVX512BW__ + nrLDPC_bnProc_BG2_R13_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #else nrLDPC_bnProc_BG2_R13_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #endif break; } case 23: { + #ifdef __AVX512BW__ + nrLDPC_bnProc_BG2_R23_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #else nrLDPC_bnProc_BG2_R23_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #endif break; } @@ -684,7 +745,6 @@ if (BG==1) - #ifdef NR_LDPC_PROFILER_DETAIL stop_meas(&p_profiler->bnProc); #endif @@ -777,7 +837,7 @@ if (BG==1) } } - } + } else { switch (R) @@ -868,7 +928,7 @@ if (BG==1) break; } } - } + } else { switch (R) @@ -915,47 +975,72 @@ if (BG==1) //nrLDPC_bnProc(p_lut, p_procBuf, Z); + if (BG==1) { switch (R) { case 13: { + #ifdef __AVX512BW__ + nrLDPC_bnProc_BG1_R13_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #else nrLDPC_bnProc_BG1_R13_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #endif break; } case 23: { + #ifdef __AVX512BW__ + nrLDPC_bnProc_BG1_R23_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #else nrLDPC_bnProc_BG1_R23_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #endif break; } case 89: { + #ifdef __AVX512BW__ + nrLDPC_bnProc_BG1_R89_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #else nrLDPC_bnProc_BG1_R89_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #endif break; } } - } + } else { switch (R) { case 15: { + #ifdef __AVX512BW__ + nrLDPC_bnProc_BG2_R15_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #else nrLDPC_bnProc_BG2_R15_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #endif break; } case 13: { + #ifdef __AVX512BW__ + nrLDPC_bnProc_BG2_R13_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #else nrLDPC_bnProc_BG2_R13_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #endif break; } case 23: { + #ifdef __AVX512BW__ + nrLDPC_bnProc_BG2_R23_AVX512(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #else nrLDPC_bnProc_BG2_R23_AVX2(p_procBuf->bnProcBuf, p_procBuf->bnProcBufRes,p_procBuf->llrRes, Z); + #endif break; } @@ -964,7 +1049,6 @@ if (BG==1) } - #ifdef NR_LDPC_PROFILER_DETAIL stop_meas(&p_profiler->bnProc); #endif @@ -1052,5 +1136,3 @@ if (BG==1) - - diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/sauvegarde.tar.gz b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/sauvegarde.tar.gz deleted file mode 100644 index 8a0f960339e7fad417d991a6bfa540d9992487ef..0000000000000000000000000000000000000000 Binary files a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc/sauvegarde.tar.gz and /dev/null differ diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/Makefile b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..9ca92c1ddaeb5a7c5248cd9ca6f5954806155933 --- /dev/null +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/Makefile @@ -0,0 +1,27 @@ +C=gcc +CFLAGS=-W -Wall -mavx2 +LDFLAGS= +EXEC=bnProc_gen_avx512 +SRC= $(wildcard *.c) +OBJ= $(SRC:.c=.o) + +all: $(EXEC) + +bnProc_gen_avx512: $(OBJ) + @$(CC) -o $@ $^ $(LDFLAGS) -O2 + + + +%.o: %.c + @$(CC) -o $@ -c $< $(CFLAGS) -I ${OPENAIR_HOME}/openair1 -g -std=c99 + +.PHONY: clean mrproper + +clean: + @rm -rf *.o + +mrproper: clean + @rm -rf $(EXEC) + +zip: + @tar -zcvf sauvegarde.tar.gz main.c bnProc_gen_BG1_avx512.c bnProc_gen_BG2_avx512.c Makefile diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_BG1_avx512.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_BG1_avx512.c new file mode 100644 index 0000000000000000000000000000000000000000..07c9a4a4c347c607a456cf46e75f596dd1c1d674 --- /dev/null +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_BG1_avx512.c @@ -0,0 +1,1185 @@ + + + +#include <stdint.h> +#include <immintrin.h> +#include "../../nrLDPCdecoder_defs.h" +#include "../../nrLDPC_types.h" + + + +void nrLDPC_bnProc_BG1_generator_AVX512(int R) +{ + const char *ratestr[3]={"13","23","89"}; + + if (R<0 || R>2) {printf("Illegal R %d\n",R); abort();} + + + // system("mkdir -p ../ldpc_gen_files"); + + char fname[50]; + sprintf(fname,"../ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R%s_AVX512.h",ratestr[R]); + FILE *fd=fopen(fname,"w"); + if (fd == NULL) {printf("Cannot create \n");abort();} + + //fprintf(fd,"#include <stdint.h>\n"); + //fprintf(fd,"#include <immintrin.h>\n"); + + + fprintf(fd,"static inline void nrLDPC_bnProc_BG1_R%s_AVX512(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) {\n", ratestr[R]); + + const uint8_t* lut_numBnInBnGroups; + const uint32_t* lut_startAddrBnGroups; + const uint16_t* lut_startAddrBnGroupsLlr; + if (R==0) { + + + lut_numBnInBnGroups = lut_numBnInBnGroups_BG1_R13; + lut_startAddrBnGroups = lut_startAddrBnGroups_BG1_R13; + lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG1_R13; + + } + else if (R==1){ + + lut_numBnInBnGroups = lut_numBnInBnGroups_BG1_R23; + lut_startAddrBnGroups = lut_startAddrBnGroups_BG1_R23; + lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG1_R23; + } + else if (R==2) { + + lut_numBnInBnGroups = lut_numBnInBnGroups_BG1_R89; + lut_startAddrBnGroups = lut_startAddrBnGroups_BG1_R89; + lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG1_R89; + } + else { printf("aborting, illegal R %d\n",R); fclose(fd);abort();} + + + //uint32_t M; + //uint32_t M32rem; + // uint32_t i; + uint32_t k; + // Offset to each bit within a group in terms of 32 Byte + uint32_t cnOffsetInGroup; + uint8_t idxBnGroup = 0; + + + + fprintf(fd," __m512i* p_bnProcBuf; \n"); + fprintf(fd," __m512i* p_bnProcBufRes; \n"); + fprintf(fd," __m512i* p_llrRes; \n"); + fprintf(fd," __m512i* p_res; \n"); + fprintf(fd," uint32_t M, i; \n"); + + + +// ===================================================================== + // Process group with 1 CN + // Already done in bnProcBufPc + + // ===================================================================== + + + // ===================================================================== + +fprintf(fd, "// Process group with 2 CNs \n"); + + if (lut_numBnInBnGroups[1] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs or parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[1] ); + + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[1]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<2; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + + + + } + + // ===================================================================== + + +fprintf(fd, "// Process group with 3 CNs \n"); + + + + if (lut_numBnInBnGroups[2] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[2] ); + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[2]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + for (k=0; k<3; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + } + } + + + + // ===================================================================== + + +fprintf(fd, "// Process group with 4 CNs \n"); + + + + if (lut_numBnInBnGroups[3] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[3] ); + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[3]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + for (k=0; k<4; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + } + } + + + // ===================================================================== + + + fprintf(fd, "// Process group with 5 CNs \n"); + + + + if (lut_numBnInBnGroups[4] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[4] ); + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[4]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<5; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + + // ===================================================================== + + +fprintf(fd, "// Process group with 6 CNs \n"); + + // Process group with 6 CNs + + if (lut_numBnInBnGroups[5] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[5] ); + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[5]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<6; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + // ===================================================================== + + +fprintf(fd, "// Process group with 7 CNs \n"); + + // Process group with 7 CNs + + if (lut_numBnInBnGroups[6] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[6] ); + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[6]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<7; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + // ===================================================================== + + +fprintf(fd, "// Process group with 8 CNs \n"); + + // Process group with 8 CNs + + if (lut_numBnInBnGroups[7] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[7] ); + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[7]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<8; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + // ===================================================================== + + +fprintf(fd, "// Process group with 9 CNs \n"); + + // Process group with 9 CNs + + if (lut_numBnInBnGroups[8] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[8] ); + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[8]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<9; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + // ===================================================================== + +fprintf(fd, "// Process group with 10 CNs \n"); + + // Process group with 10 CNs + + if (lut_numBnInBnGroups[9] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[9] ); + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[9]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<10; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + + // ===================================================================== + +fprintf(fd, "// Process group with 11 CNs \n"); + + if (lut_numBnInBnGroups[10] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[10] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[10]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<11; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + // ===================================================================== + + + +fprintf(fd, "// Process group with 12 CNs \n"); + + + if (lut_numBnInBnGroups[11] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[11] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[11]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<12; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + // ===================================================================== + + + +fprintf(fd, "// Process group with 13 CNs \n"); + + + + if (lut_numBnInBnGroups[12] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[12] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[12]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<13; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + + // ===================================================================== + + +fprintf(fd, "// Process group with 14 CNs \n"); + + + + if (lut_numBnInBnGroups[13] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[13] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[13]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<14; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + // ===================================================================== + + +fprintf(fd, "// Process group with 15 CNs \n"); + + + + if (lut_numBnInBnGroups[14] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[14] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[14]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<15; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + + // ===================================================================== + + +fprintf(fd, "// Process group with 16 CNs \n"); + + + + if (lut_numBnInBnGroups[15] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[15] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[15]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<16; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + // ===================================================================== + // Process group with 17 CNs + +fprintf(fd, "// Process group with 17 CNs \n"); + + // Process group with 17 CNs + + if (lut_numBnInBnGroups[16] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[16] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[16]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<17; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + // ===================================================================== + + +fprintf(fd, "// Process group with 18 CNs \n"); + + // Process group with 8 CNs + + if (lut_numBnInBnGroups[17] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[17] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[17]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<18; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + // ===================================================================== + + +fprintf(fd, "// Process group with 19 CNs \n"); + + + + if (lut_numBnInBnGroups[18] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[18] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[18]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<19; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + // ===================================================================== + + +fprintf(fd, "// Process group with 20 CNs \n"); + + + + if (lut_numBnInBnGroups[19] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[19] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[19]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<20; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + + + + // ===================================================================== + +fprintf(fd, "// Process group with 21 CNs \n"); + + + + + + if (lut_numBnInBnGroups[20] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[20] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[20]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<21; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + // ===================================================================== + + + +fprintf(fd, "// Process group with 22 CNs \n"); + + + if (lut_numBnInBnGroups[21] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[21] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[21]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<22; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + // ===================================================================== + + + +fprintf(fd, "// Process group with <23 CNs \n"); + + + + if (lut_numBnInBnGroups[22] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[22] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[22]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<23; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + + // ===================================================================== + + +fprintf(fd, "// Process group with 24 CNs \n"); + + // Process group with 4 CNs + + if (lut_numBnInBnGroups[23] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[23] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[23]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<24; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + // ===================================================================== + + +fprintf(fd, "// Process group with 25 CNs \n"); + + + + if (lut_numBnInBnGroups[24] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[24] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[24]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<25; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + + fprintf(fd,"}\n"); + + } + } + + + + // ===================================================================== + + +fprintf(fd, "// Process group with 26 CNs \n"); + + + + if (lut_numBnInBnGroups[25] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[25] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[25]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<26; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + // ===================================================================== + + +fprintf(fd, "// Process group with 27 CNs \n"); + + // Process group with 17 CNs + + if (lut_numBnInBnGroups[26] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[26] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[26]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<27; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + // ===================================================================== + + +fprintf(fd, "// Process group with 28 CNs \n"); + + // Process group with 8 CNs + + if (lut_numBnInBnGroups[27] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[27] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[27]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<28; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + // ===================================================================== + +fprintf(fd, "// Process group with 29 CNs \n"); + + // Process group with 9 CNs + + if (lut_numBnInBnGroups[28] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[28] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[28]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<29; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + // ===================================================================== + +fprintf(fd, "// Process group with 30 CNs \n"); + + // Process group with 20 CNs + + if (lut_numBnInBnGroups[29] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[29] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[29]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<30; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + fprintf(fd,"}\n"); + fclose(fd); +}//end of the function nrLDPC_bnProc_BG1 + + + + + diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_BG2_avx512.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_BG2_avx512.c new file mode 100644 index 0000000000000000000000000000000000000000..ccd01d29a340c83d17a51b00ea40a16f72a9f57c --- /dev/null +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_BG2_avx512.c @@ -0,0 +1,1184 @@ + + +#include <stdint.h> +#include <immintrin.h> +#include "../../nrLDPCdecoder_defs.h" +#include "../../nrLDPC_types.h" + + +void nrLDPC_bnProc_BG2_generator_AVX512(int R) +{ + const char *ratestr[3]={"15","13","23"}; + + if (R<0 || R>2) {printf("Illegal R %d\n",R); abort();} + + + // system("mkdir -p ../ldpc_gen_files"); + + char fname[50]; + sprintf(fname,"../ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R%s_AVX512.h",ratestr[R]); + FILE *fd=fopen(fname,"w"); + if (fd == NULL) {printf("Cannot create \n");abort();} + + fprintf(fd,"#include <stdint.h>\n"); + fprintf(fd,"#include <immintrin.h>\n"); + + fprintf(fd,"void nrLDPC_bnProc_BG2_R%s_AVX512(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) {\n",ratestr[R]); + const uint8_t* lut_numBnInBnGroups; + const uint32_t* lut_startAddrBnGroups; + const uint16_t* lut_startAddrBnGroupsLlr; + if (R==0) { + + + lut_numBnInBnGroups = lut_numBnInBnGroups_BG2_R15; + lut_startAddrBnGroups = lut_startAddrBnGroups_BG2_R15; + lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG2_R15; + + } + else if (R==1){ + + lut_numBnInBnGroups = lut_numBnInBnGroups_BG2_R13; + lut_startAddrBnGroups = lut_startAddrBnGroups_BG2_R13; + lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG2_R13; + } + else if (R==2) { + + lut_numBnInBnGroups = lut_numBnInBnGroups_BG2_R23; + lut_startAddrBnGroups = lut_startAddrBnGroups_BG2_R23; + lut_startAddrBnGroupsLlr = lut_startAddrBnGroupsLlr_BG2_R23; + } + else { printf("aborting, illegal R %d\n",R); fclose(fd);abort();} + + + //uint32_t M; + //uint32_t M32rem; + // uint32_t i; + uint32_t k; + // Offset to each bit within a group in terms of 32 Byte + uint32_t cnOffsetInGroup; + uint8_t idxBnGroup = 0; + + + + fprintf(fd," __m512i* p_bnProcBuf; \n"); + fprintf(fd," __m512i* p_bnProcBufRes; \n"); + fprintf(fd," __m512i* p_llrRes; \n"); + fprintf(fd," __m512i* p_res; \n"); + fprintf(fd," uint32_t M, i; \n"); + + + +// ===================================================================== + // Process group with 1 CN + // Already done in bnProcBufPc + + // ===================================================================== + + + // ===================================================================== + +fprintf(fd, "// Process group with 2 CNs \n"); + + if (lut_numBnInBnGroups[1] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs or parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[1] ); + + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[1]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<2; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + + + + } + + // ===================================================================== + + +fprintf(fd, "// Process group with 3 CNs \n"); + + + + if (lut_numBnInBnGroups[2] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[2] ); + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[2]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + for (k=0; k<3; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + } + } + + + + // ===================================================================== + + +fprintf(fd, "// Process group with 4 CNs \n"); + + + + if (lut_numBnInBnGroups[3] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[3] ); + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[3]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + for (k=0; k<4; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + } + } + + + // ===================================================================== + + + fprintf(fd, "// Process group with 5 CNs \n"); + + + + if (lut_numBnInBnGroups[4] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[4] ); + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[4]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<5; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + + // ===================================================================== + + +fprintf(fd, "// Process group with 6 CNs \n"); + + // Process group with 6 CNs + + if (lut_numBnInBnGroups[5] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[5] ); + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[5]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<6; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + // ===================================================================== + + +fprintf(fd, "// Process group with 7 CNs \n"); + + // Process group with 7 CNs + + if (lut_numBnInBnGroups[6] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[6] ); + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[6]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<7; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + // ===================================================================== + + +fprintf(fd, "// Process group with 8 CNs \n"); + + // Process group with 8 CNs + + if (lut_numBnInBnGroups[7] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[7] ); + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[7]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<8; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + // ===================================================================== + + +fprintf(fd, "// Process group with 9 CNs \n"); + + // Process group with 9 CNs + + if (lut_numBnInBnGroups[8] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[8] ); + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[8]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<9; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + // ===================================================================== + +fprintf(fd, "// Process group with 10 CNs \n"); + + // Process group with 10 CNs + + if (lut_numBnInBnGroups[9] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[9] ); + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[9]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<10; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + + // ===================================================================== + +fprintf(fd, "// Process group with 11 CNs \n"); + + if (lut_numBnInBnGroups[10] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[10] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[10]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<11; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + // ===================================================================== + + + +fprintf(fd, "// Process group with 12 CNs \n"); + + + if (lut_numBnInBnGroups[11] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[11] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[11]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<12; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + // ===================================================================== + + + +fprintf(fd, "// Process group with 13 CNs \n"); + + + + if (lut_numBnInBnGroups[12] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[12] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[12]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<13; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + + // ===================================================================== + + +fprintf(fd, "// Process group with 14 CNs \n"); + + + + if (lut_numBnInBnGroups[13] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[13] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[13]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<14; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + // ===================================================================== + + +fprintf(fd, "// Process group with 15 CNs \n"); + + + + if (lut_numBnInBnGroups[14] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[14] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[14]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<15; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + + // ===================================================================== + + +fprintf(fd, "// Process group with 16 CNs \n"); + + + + if (lut_numBnInBnGroups[15] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[15] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[15]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<16; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + // ===================================================================== + // Process group with 17 CNs + +fprintf(fd, "// Process group with 17 CNs \n"); + + // Process group with 17 CNs + + if (lut_numBnInBnGroups[16] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[16] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[16]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<17; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + // ===================================================================== + + +fprintf(fd, "// Process group with 18 CNs \n"); + + // Process group with 8 CNs + + if (lut_numBnInBnGroups[17] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[17] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[17]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<18; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + // ===================================================================== + + +fprintf(fd, "// Process group with 19 CNs \n"); + + + + if (lut_numBnInBnGroups[18] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[18] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[18]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<19; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + // ===================================================================== + + +fprintf(fd, "// Process group with 20 CNs \n"); + + + + if (lut_numBnInBnGroups[19] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[19] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[19]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<20; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + + + + // ===================================================================== + +fprintf(fd, "// Process group with 21 CNs \n"); + + + + + + if (lut_numBnInBnGroups[20] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[20] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[20]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<21; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + // ===================================================================== + + + +fprintf(fd, "// Process group with 22 CNs \n"); + + + if (lut_numBnInBnGroups[21] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[21] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[21]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<22; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + // ===================================================================== + + + +fprintf(fd, "// Process group with <23 CNs \n"); + + + + if (lut_numBnInBnGroups[22] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[22] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[22]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<23; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + + // ===================================================================== + + +fprintf(fd, "// Process group with 24 CNs \n"); + + // Process group with 4 CNs + + if (lut_numBnInBnGroups[23] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[23] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[23]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<24; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + // ===================================================================== + + +fprintf(fd, "// Process group with 25 CNs \n"); + + + + if (lut_numBnInBnGroups[24] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[24] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[24]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<25; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + + fprintf(fd,"}\n"); + + } + } + + + + // ===================================================================== + + +fprintf(fd, "// Process group with 26 CNs \n"); + + + + if (lut_numBnInBnGroups[25] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[25] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[25]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<26; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + // ===================================================================== + + +fprintf(fd, "// Process group with 27 CNs \n"); + + // Process group with 17 CNs + + if (lut_numBnInBnGroups[26] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[26] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[26]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<27; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + // ===================================================================== + + +fprintf(fd, "// Process group with 28 CNs \n"); + + // Process group with 8 CNs + + if (lut_numBnInBnGroups[27] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[27] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[27]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<28; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + // ===================================================================== + +fprintf(fd, "// Process group with 29 CNs \n"); + + // Process group with 9 CNs + + if (lut_numBnInBnGroups[28] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[28] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[28]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<29; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + + // ===================================================================== + +fprintf(fd, "// Process group with 30 CNs \n"); + + // Process group with 20 CNs + + if (lut_numBnInBnGroups[29] > 0) + { + // If elements in group move to next address + idxBnGroup++; + + // Number of groups of 32 BNs for parallel processing + fprintf(fd," M = (%d*Z + 63)>>6;\n",lut_numBnInBnGroups[29] );; + + // Set the offset to each CN within a group in terms of 16 Byte + cnOffsetInGroup = (lut_numBnInBnGroups[29]*NR_LDPC_ZMAX)>>6; + + // Set pointers to start of group 2 + fprintf(fd," p_bnProcBuf = (__m512i*) &bnProcBuf [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + fprintf(fd," p_bnProcBufRes = (__m512i*) &bnProcBufRes [%d];\n",lut_startAddrBnGroups[idxBnGroup]); + + // Loop over CNs + for (k=0; k<30; k++) + { + fprintf(fd," p_res = &p_bnProcBufRes[%d];\n", k*cnOffsetInGroup); + fprintf(fd," p_llrRes = (__m512i*) &llrRes [%d];\n",lut_startAddrBnGroupsLlr[idxBnGroup]); + + // Loop over BNs + fprintf(fd," for (i=0;i<M;i++) {\n"); + fprintf(fd," p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[%d + i]);\n",k*cnOffsetInGroup); + + fprintf(fd,"}\n"); + + } + } + + fprintf(fd,"}\n"); + fclose(fd); +}//end of the function nrLDPC_bnProc_BG1 + + + + + + + + diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_avx512 b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_avx512 new file mode 100755 index 0000000000000000000000000000000000000000..f686a271f18d31892f83a65819d96e3439325fd9 Binary files /dev/null and b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/bnProc_gen_avx512 differ diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/main.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/main.c new file mode 100644 index 0000000000000000000000000000000000000000..3b2bc3ede498433e920ff2431f816ea7aa1a0d53 --- /dev/null +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_bnProc_avx512/main.c @@ -0,0 +1,26 @@ + +#include <stdio.h> +#include <stdint.h> +#define NB_R 3 +void nrLDPC_bnProc_BG1_generator_AVX512(int); +void nrLDPC_bnProc_BG2_generator_AVX512(int); +//void nrLDPC_bnProcPc_BG1_generator_AVX2(int); +//void nrLDPC_bnProcPc_BG2_generator_AVX2(int); + +int main() +{ + int R[NB_R]={0,1,2}; + for(int i=0; i<NB_R;i++){ + + nrLDPC_bnProc_BG1_generator_AVX512(R[i]); + nrLDPC_bnProc_BG2_generator_AVX512(R[i]); + +// nrLDPC_bnProcPc_BG1_generator_AVX2(R[i]); +// nrLDPC_bnProcPc_BG2_generator_AVX2(R[i]); + + } + + + return(0); +} + diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_cnProc/sauvegarde.tar.gz b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_cnProc/sauvegarde.tar.gz deleted file mode 100644 index 926796c407cb14219f4a93b5cee09c64dd285c95..0000000000000000000000000000000000000000 Binary files a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_cnProc/sauvegarde.tar.gz and /dev/null differ diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_cnProc_avx512/sauvegarde.tar.gz b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_cnProc_avx512/sauvegarde.tar.gz deleted file mode 100644 index 69e1d9ca37cf707a56e023bb1f4d6c808389dcf2..0000000000000000000000000000000000000000 Binary files a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/generator_cnProc_avx512/sauvegarde.tar.gz and /dev/null differ diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R13_AVX2.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R13_AVX2.c deleted file mode 100644 index 3f1c421cebdee768cc056933786826f33ddf9fe6..0000000000000000000000000000000000000000 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R13_AVX2.c +++ /dev/null @@ -1,440 +0,0 @@ -#include <stdint.h> -#include <immintrin.h> -void nrLDPC_bnProc_BG2_R13_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) { - __m256i* p_bnProcBuf; - __m256i* p_bnProcBufRes; - __m256i* p_llrRes; - __m256i* p_res; - uint32_t M, i; -// Process group with 2 CNs - M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [6912]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [6912]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [6912]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); -} - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [6912]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); -} -// Process group with 3 CNs -// Process group with 4 CNs - M = (2*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [7680]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [7680]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [7296]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); -} - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [7296]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); -} - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [7296]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); -} - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [7296]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); -} -// Process group with 5 CNs - M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [10752]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [10752]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [8064]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); -} - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [8064]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); -} - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [8064]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); -} - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [8064]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); -} - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [8064]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); -} -// Process group with 6 CNs - M = (5*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [12672]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [12672]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [8448]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); -} - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [8448]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); -} - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [8448]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); -} - p_res = &p_bnProcBufRes[180]; - p_llrRes = (__m256i*) &llrRes [8448]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[180 + i]); -} - p_res = &p_bnProcBufRes[240]; - p_llrRes = (__m256i*) &llrRes [8448]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[240 + i]); -} - p_res = &p_bnProcBufRes[300]; - p_llrRes = (__m256i*) &llrRes [8448]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[300 + i]); -} -// Process group with 7 CNs - M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [24192]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [24192]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [10368]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); -} - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [10368]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); -} - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [10368]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); -} - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [10368]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); -} - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [10368]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); -} - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [10368]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); -} - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [10368]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); -} -// Process group with 8 CNs - M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [26880]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [26880]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [10752]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); -} - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [10752]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); -} - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [10752]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); -} - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [10752]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); -} - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [10752]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); -} - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [10752]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); -} - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [10752]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); -} - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [10752]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); -} -// Process group with 9 CNs -// Process group with 10 CNs -// Process group with 11 CNs -// Process group with 12 CNs -// Process group with 13 CNs - M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [29952]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [29952]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [11136]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); -} - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [11136]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); -} - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [11136]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); -} - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [11136]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); -} - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [11136]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); -} - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [11136]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); -} - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [11136]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); -} - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [11136]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); -} - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [11136]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); -} - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [11136]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); -} - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [11136]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); -} - p_res = &p_bnProcBufRes[132]; - p_llrRes = (__m256i*) &llrRes [11136]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); -} - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m256i*) &llrRes [11136]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); -} -// Process group with 14 CNs - M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [34944]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [34944]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [11520]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); -} - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [11520]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); -} - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [11520]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); -} - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [11520]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); -} - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [11520]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); -} - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [11520]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); -} - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [11520]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); -} - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [11520]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); -} - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [11520]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); -} - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [11520]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); -} - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [11520]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); -} - p_res = &p_bnProcBufRes[132]; - p_llrRes = (__m256i*) &llrRes [11520]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); -} - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m256i*) &llrRes [11520]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); -} - p_res = &p_bnProcBufRes[156]; - p_llrRes = (__m256i*) &llrRes [11520]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]); -} -// Process group with 15 CNs -// Process group with 16 CNs - M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [40320]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [40320]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [11904]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); -} - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [11904]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); -} - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [11904]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); -} - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [11904]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); -} - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [11904]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); -} - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [11904]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); -} - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [11904]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); -} - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [11904]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); -} - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [11904]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); -} - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [11904]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); -} - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [11904]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); -} - p_res = &p_bnProcBufRes[132]; - p_llrRes = (__m256i*) &llrRes [11904]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); -} - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m256i*) &llrRes [11904]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); -} - p_res = &p_bnProcBufRes[156]; - p_llrRes = (__m256i*) &llrRes [11904]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]); -} - p_res = &p_bnProcBufRes[168]; - p_llrRes = (__m256i*) &llrRes [11904]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]); -} - p_res = &p_bnProcBufRes[180]; - p_llrRes = (__m256i*) &llrRes [11904]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[180 + i]); -} -// Process group with 17 CNs -// Process group with 18 CNs -// Process group with 19 CNs -// Process group with 20 CNs -// Process group with 21 CNs -// Process group with 22 CNs -// Process group with <23 CNs -// Process group with 24 CNs -// Process group with 25 CNs -// Process group with 26 CNs -// Process group with 27 CNs -// Process group with 28 CNs -// Process group with 29 CNs -// Process group with 30 CNs -} diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R15_AVX2.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R15_AVX2.c deleted file mode 100644 index f649102df7a8cde518242a39292f8180778425e4..0000000000000000000000000000000000000000 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R15_AVX2.c +++ /dev/null @@ -1,799 +0,0 @@ -#include <stdint.h> -#include <immintrin.h> -void nrLDPC_bnProc_BG2_R15_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) { - __m256i* p_bnProcBuf; - __m256i* p_bnProcBufRes; - __m256i* p_llrRes; - __m256i* p_res; - uint32_t M, i; -// Process group with 2 CNs -// Process group with 3 CNs -// Process group with 4 CNs -// Process group with 5 CNs - M = (2*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [14592]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [14592]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [14592]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); -} - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [14592]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); -} - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [14592]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); -} - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [14592]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); -} - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [14592]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); -} -// Process group with 6 CNs - M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [18432]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [18432]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [15360]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); -} - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [15360]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); -} - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [15360]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); -} - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [15360]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); -} - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [15360]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); -} - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [15360]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); -} -// Process group with 7 CNs - M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [20736]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [20736]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [15744]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); -} - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [15744]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); -} - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [15744]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); -} - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [15744]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); -} - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [15744]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); -} - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [15744]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); -} - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [15744]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); -} -// Process group with 8 CNs - M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [23424]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [23424]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [16128]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); -} - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [16128]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); -} - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [16128]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); -} - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [16128]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); -} - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [16128]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); -} - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [16128]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); -} - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [16128]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); -} - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [16128]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); -} -// Process group with 9 CNs - M = (2*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [26496]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [26496]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [16512]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); -} - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [16512]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); -} - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [16512]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); -} - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [16512]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); -} - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [16512]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); -} - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [16512]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); -} - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m256i*) &llrRes [16512]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); -} - p_res = &p_bnProcBufRes[168]; - p_llrRes = (__m256i*) &llrRes [16512]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]); -} - p_res = &p_bnProcBufRes[192]; - p_llrRes = (__m256i*) &llrRes [16512]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[192 + i]); -} -// Process group with 10 CNs - M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [33408]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [33408]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [17280]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); -} - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [17280]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); -} - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [17280]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); -} - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [17280]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); -} - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [17280]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); -} - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [17280]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); -} - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [17280]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); -} - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [17280]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); -} - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [17280]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); -} - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [17280]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); -} -// Process group with 11 CNs -// Process group with 12 CNs - M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [37248]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [37248]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [17664]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); -} - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [17664]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); -} - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [17664]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); -} - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [17664]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); -} - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [17664]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); -} - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [17664]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); -} - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [17664]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); -} - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [17664]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); -} - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [17664]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); -} - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [17664]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); -} - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [17664]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); -} - p_res = &p_bnProcBufRes[132]; - p_llrRes = (__m256i*) &llrRes [17664]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); -} -// Process group with 13 CNs - M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [41856]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [41856]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [18048]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); -} - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [18048]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); -} - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [18048]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); -} - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [18048]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); -} - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [18048]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); -} - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [18048]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); -} - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [18048]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); -} - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [18048]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); -} - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [18048]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); -} - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [18048]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); -} - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [18048]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); -} - p_res = &p_bnProcBufRes[132]; - p_llrRes = (__m256i*) &llrRes [18048]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); -} - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m256i*) &llrRes [18048]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); -} -// Process group with 14 CNs - M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [46848]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [46848]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [18432]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); -} - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [18432]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); -} - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [18432]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); -} - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [18432]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); -} - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [18432]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); -} - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [18432]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); -} - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [18432]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); -} - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [18432]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); -} - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [18432]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); -} - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [18432]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); -} - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [18432]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); -} - p_res = &p_bnProcBufRes[132]; - p_llrRes = (__m256i*) &llrRes [18432]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); -} - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m256i*) &llrRes [18432]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); -} - p_res = &p_bnProcBufRes[156]; - p_llrRes = (__m256i*) &llrRes [18432]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]); -} -// Process group with 15 CNs -// Process group with 16 CNs - M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [52224]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [52224]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [18816]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); -} - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [18816]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); -} - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [18816]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); -} - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [18816]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); -} - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [18816]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); -} - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [18816]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); -} - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [18816]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); -} - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [18816]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); -} - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [18816]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); -} - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [18816]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); -} - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [18816]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); -} - p_res = &p_bnProcBufRes[132]; - p_llrRes = (__m256i*) &llrRes [18816]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); -} - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m256i*) &llrRes [18816]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); -} - p_res = &p_bnProcBufRes[156]; - p_llrRes = (__m256i*) &llrRes [18816]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]); -} - p_res = &p_bnProcBufRes[168]; - p_llrRes = (__m256i*) &llrRes [18816]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]); -} - p_res = &p_bnProcBufRes[180]; - p_llrRes = (__m256i*) &llrRes [18816]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[180 + i]); -} -// Process group with 17 CNs -// Process group with 18 CNs -// Process group with 19 CNs -// Process group with 20 CNs -// Process group with 21 CNs -// Process group with 22 CNs - M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [58368]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [58368]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [19200]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); -} - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [19200]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); -} - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [19200]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); -} - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [19200]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); -} - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [19200]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); -} - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [19200]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); -} - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [19200]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); -} - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [19200]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); -} - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [19200]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); -} - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [19200]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); -} - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [19200]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); -} - p_res = &p_bnProcBufRes[132]; - p_llrRes = (__m256i*) &llrRes [19200]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); -} - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m256i*) &llrRes [19200]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); -} - p_res = &p_bnProcBufRes[156]; - p_llrRes = (__m256i*) &llrRes [19200]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]); -} - p_res = &p_bnProcBufRes[168]; - p_llrRes = (__m256i*) &llrRes [19200]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]); -} - p_res = &p_bnProcBufRes[180]; - p_llrRes = (__m256i*) &llrRes [19200]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[180 + i]); -} - p_res = &p_bnProcBufRes[192]; - p_llrRes = (__m256i*) &llrRes [19200]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[192 + i]); -} - p_res = &p_bnProcBufRes[204]; - p_llrRes = (__m256i*) &llrRes [19200]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[204 + i]); -} - p_res = &p_bnProcBufRes[216]; - p_llrRes = (__m256i*) &llrRes [19200]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[216 + i]); -} - p_res = &p_bnProcBufRes[228]; - p_llrRes = (__m256i*) &llrRes [19200]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[228 + i]); -} - p_res = &p_bnProcBufRes[240]; - p_llrRes = (__m256i*) &llrRes [19200]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[240 + i]); -} - p_res = &p_bnProcBufRes[252]; - p_llrRes = (__m256i*) &llrRes [19200]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[252 + i]); -} -// Process group with <23 CNs - M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [66816]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [66816]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [19584]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); -} - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [19584]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); -} - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [19584]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); -} - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [19584]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); -} - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [19584]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); -} - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [19584]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); -} - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [19584]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); -} - p_res = &p_bnProcBufRes[84]; - p_llrRes = (__m256i*) &llrRes [19584]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); -} - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [19584]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); -} - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [19584]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); -} - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [19584]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); -} - p_res = &p_bnProcBufRes[132]; - p_llrRes = (__m256i*) &llrRes [19584]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); -} - p_res = &p_bnProcBufRes[144]; - p_llrRes = (__m256i*) &llrRes [19584]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); -} - p_res = &p_bnProcBufRes[156]; - p_llrRes = (__m256i*) &llrRes [19584]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]); -} - p_res = &p_bnProcBufRes[168]; - p_llrRes = (__m256i*) &llrRes [19584]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]); -} - p_res = &p_bnProcBufRes[180]; - p_llrRes = (__m256i*) &llrRes [19584]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[180 + i]); -} - p_res = &p_bnProcBufRes[192]; - p_llrRes = (__m256i*) &llrRes [19584]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[192 + i]); -} - p_res = &p_bnProcBufRes[204]; - p_llrRes = (__m256i*) &llrRes [19584]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[204 + i]); -} - p_res = &p_bnProcBufRes[216]; - p_llrRes = (__m256i*) &llrRes [19584]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[216 + i]); -} - p_res = &p_bnProcBufRes[228]; - p_llrRes = (__m256i*) &llrRes [19584]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[228 + i]); -} - p_res = &p_bnProcBufRes[240]; - p_llrRes = (__m256i*) &llrRes [19584]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[240 + i]); -} - p_res = &p_bnProcBufRes[252]; - p_llrRes = (__m256i*) &llrRes [19584]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[252 + i]); -} - p_res = &p_bnProcBufRes[264]; - p_llrRes = (__m256i*) &llrRes [19584]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[264 + i]); -} -// Process group with 24 CNs -// Process group with 25 CNs -// Process group with 26 CNs -// Process group with 27 CNs -// Process group with 28 CNs -// Process group with 29 CNs -// Process group with 30 CNs -} diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R23_AVX2.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R23_AVX2.c deleted file mode 100644 index 59a1613099c8da476887c855153e5a4b2ebdd575..0000000000000000000000000000000000000000 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc/nrLDPC_bnProc_BG2_R23_AVX2.c +++ /dev/null @@ -1,153 +0,0 @@ -#include <stdint.h> -#include <immintrin.h> -void nrLDPC_bnProc_BG2_R23_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) { - __m256i* p_bnProcBuf; - __m256i* p_bnProcBufRes; - __m256i* p_llrRes; - __m256i* p_res; - uint32_t M, i; -// Process group with 2 CNs - M = (3*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [1152]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [1152]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [1152]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); -} - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [1152]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); -} -// Process group with 3 CNs - M = (5*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [3456]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [3456]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [2304]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); -} - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [2304]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); -} - p_res = &p_bnProcBufRes[120]; - p_llrRes = (__m256i*) &llrRes [2304]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); -} -// Process group with 4 CNs - M = (3*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [9216]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [9216]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [4224]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); -} - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [4224]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); -} - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [4224]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); -} - p_res = &p_bnProcBufRes[108]; - p_llrRes = (__m256i*) &llrRes [4224]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); -} -// Process group with 5 CNs - M = (2*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [13824]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [13824]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [5376]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); -} - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [5376]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); -} - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [5376]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); -} - p_res = &p_bnProcBufRes[72]; - p_llrRes = (__m256i*) &llrRes [5376]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); -} - p_res = &p_bnProcBufRes[96]; - p_llrRes = (__m256i*) &llrRes [5376]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); -} -// Process group with 6 CNs - M = (1*Z + 31)>>5; - p_bnProcBuf = (__m256i*) &bnProcBuf [17664]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [17664]; - p_res = &p_bnProcBufRes[0]; - p_llrRes = (__m256i*) &llrRes [6144]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); -} - p_res = &p_bnProcBufRes[12]; - p_llrRes = (__m256i*) &llrRes [6144]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); -} - p_res = &p_bnProcBufRes[24]; - p_llrRes = (__m256i*) &llrRes [6144]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); -} - p_res = &p_bnProcBufRes[36]; - p_llrRes = (__m256i*) &llrRes [6144]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); -} - p_res = &p_bnProcBufRes[48]; - p_llrRes = (__m256i*) &llrRes [6144]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); -} - p_res = &p_bnProcBufRes[60]; - p_llrRes = (__m256i*) &llrRes [6144]; - for (i=0;i<M;i++) { - p_res[i] = _mm256_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); -} -// Process group with 7 CNs -// Process group with 8 CNs -// Process group with 9 CNs -// Process group with 10 CNs -// Process group with 11 CNs -// Process group with 12 CNs -// Process group with 13 CNs -// Process group with 14 CNs -// Process group with 15 CNs -// Process group with 16 CNs -// Process group with 17 CNs -// Process group with 18 CNs -// Process group with 19 CNs -// Process group with 20 CNs -// Process group with 21 CNs -// Process group with 22 CNs -// Process group with <23 CNs -// Process group with 24 CNs -// Process group with 25 CNs -// Process group with 26 CNs -// Process group with 27 CNs -// Process group with 28 CNs -// Process group with 29 CNs -// Process group with 30 CNs -} diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG2_R89_AVX2.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG2_R89_AVX2.h deleted file mode 100644 index c01c203115310d02bc731165705702b3e94deb7a..0000000000000000000000000000000000000000 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProcPc/nrLDPC_bnProcPc_BG2_R89_AVX2.h +++ /dev/null @@ -1,192 +0,0 @@ -static inline void nrLDPC_bnProcPc_BG2_R89_AVX2(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes , int8_t* llrProcBuf, uint16_t Z ) { - __m256i ymm0, ymm1, ymmRes0, ymmRes1; - __m128i* p_bnProcBuf; - __m256i* p_bnProcBufRes; - __m128i* p_llrProcBuf; - __m256i* p_llrProcBuf256; - __m256i* p_llrRes; - uint32_t M ; -// Process group with 1 CNs - M = (0*Z + 31)>>5; - p_bnProcBuf = (__m128i*) &bnProcBuf [0]; - p_bnProcBufRes = (__m256i*) &bnProcBufRes [0]; - p_llrProcBuf = (__m128i*) &llrProcBuf [0]; - p_llrProcBuf256 = (__m256i*) &llrProcBuf [0]; - p_llrRes = (__m256i*) &llrRes [0]; - for (int i=0,j=0;i<M;i++,j+=2) { - p_bnProcBufRes[i] = p_llrProcBuf256[i]; - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]); - ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j]); - ymmRes0 = _mm256_adds_epi16(ymm0, ymm1); - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j+1]); - ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j+1]); - ymmRes1 = _mm256_adds_epi16(ymm0, ymm1); - ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); - *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); - p_llrRes++; -} -// Process group with 2 CNs - M = (3*Z + 31)>>5; - p_bnProcBuf = (__m128i*) &bnProcBuf [1152]; - p_llrProcBuf = (__m128i*) &llrProcBuf [1152]; - p_llrRes = (__m256i*) &llrRes [1152]; - for (int i=0,j=0;i<M;i++,j+=2) { - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]); - ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j]); - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[72 + j]); - ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0); - ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[72 + j +1]); - ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); - ymm0 = _mm256_cvtepi8_epi16(p_llrProcBuf[j]); - ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0); - ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]); - ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); - ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); - *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); - p_llrRes++; -} -// Process group with 3 CNs - M = (5*Z + 31)>>5; - p_bnProcBuf = (__m128i*) &bnProcBuf [3456]; - p_llrProcBuf = (__m128i*) &llrProcBuf [2304]; - p_llrRes = (__m256i*) &llrRes [2304]; - for (int i=0,j=0;i<M;i++,j+=2) { - ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]); - ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]); - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[120 + j]); - ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0); - ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[120 + j +1]); - ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[240 + j]); - ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0); - ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[240 + j +1]); - ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); - ymm0 = _mm256_cvtepi8_epi16(p_llrProcBuf[j]); - ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0); - ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]); - ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); - ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); - *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); - p_llrRes++; -} -// Process group with 4 CNs - M = (3*Z + 31)>>5; - p_bnProcBuf = (__m128i*) &bnProcBuf [9216]; - p_llrProcBuf = (__m128i*) &llrProcBuf [4224]; - p_llrRes = (__m256i*) &llrRes [4224]; - for (int i=0,j=0;i<M;i++,j+=2) { - ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]); - ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]); - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[72 + j]); - ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0); - ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[72 + j +1]); - ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[144 + j]); - ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0); - ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[144 + j +1]); - ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[216 + j]); - ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0); - ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[216 + j +1]); - ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); - ymm0 = _mm256_cvtepi8_epi16(p_llrProcBuf[j]); - ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0); - ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]); - ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); - ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); - *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); - p_llrRes++; -} -// Process group with 5 CNs - M = (2*Z + 31)>>5; - p_bnProcBuf = (__m128i*) &bnProcBuf [13824]; - p_llrProcBuf = (__m128i*) &llrProcBuf [5376]; - p_llrRes = (__m256i*) &llrRes [5376]; - for (int i=0,j=0;i<M;i++,j+=2) { - ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]); - ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]); - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[48 + j]); - ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0); - ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[48 + j +1]); - ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[96 + j]); - ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0); - ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[96 + j +1]); - ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[144 + j]); - ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0); - ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[144 + j +1]); - ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[192 + j]); - ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0); - ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[192 + j +1]); - ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); - ymm0 = _mm256_cvtepi8_epi16(p_llrProcBuf[j]); - ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0); - ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]); - ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); - ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); - *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); - p_llrRes++; -} -// Process group with 6 CNs - M = (1*Z + 31)>>5; - p_bnProcBuf = (__m128i*) &bnProcBuf [17664]; - p_llrProcBuf = (__m128i*) &llrProcBuf [6144]; - p_llrRes = (__m256i*) &llrRes [6144]; - for (int i=0,j=0;i<M;i++,j+=2) { - ymmRes0 = _mm256_cvtepi8_epi16(p_bnProcBuf [j]); - ymmRes1 = _mm256_cvtepi8_epi16(p_bnProcBuf [j +1]); - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[24 + j]); - ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0); - ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[24 + j +1]); - ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[48 + j]); - ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0); - ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[48 + j +1]); - ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[72 + j]); - ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0); - ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[72 + j +1]); - ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[96 + j]); - ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0); - ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[96 + j +1]); - ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); - ymm0 = _mm256_cvtepi8_epi16(p_bnProcBuf[120 + j]); - ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0); - ymm1 = _mm256_cvtepi8_epi16(p_bnProcBuf[120 + j +1]); - ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); - ymm0 = _mm256_cvtepi8_epi16(p_llrProcBuf[j]); - ymmRes0 = _mm256_adds_epi16(ymmRes0, ymm0); - ymm1 = _mm256_cvtepi8_epi16(p_llrProcBuf[j +1 ]); - ymmRes1 = _mm256_adds_epi16(ymmRes1, ymm1); - ymm0 = _mm256_packs_epi16(ymmRes0, ymmRes1); - *p_llrRes = _mm256_permute4x64_epi64(ymm0, 0xD8); - p_llrRes++; -} -// Process group with 7 CNs -// Process group with 8 CNs -// Process group with 9 CNs -// Process group with 10 CNs -// Process group with 11 CNs -// Process group with 12 CNs -// Process group with 13 CNs -// Process group with 14 CNs -// Process group with 15 CNs -// Process group with 16 CNs -// Process group with 17 CNs -// Process group with 18 CNs -// Process group with 19 CNs -// Process group with 20 CNs -// Process group with 21 CNs -// Process group with 22 CNs -// Process group with 23 CNs -// Process group with 24 CNs -// Process group with 25 CNs -// Process group with 26 CNs -// Process group with 27 CNs -// Process group with 28 CNs -// Process group with 29 CNs -// Process group with 30 CNs -} diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R13_AVX512.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R13_AVX512.h new file mode 100644 index 0000000000000000000000000000000000000000..d9738fd57aa6a6fd9268ddaed68cf45d43ad5fca --- /dev/null +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R13_AVX512.h @@ -0,0 +1,787 @@ +static inline void nrLDPC_bnProc_BG1_R13_AVX512(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) { + __m512i* p_bnProcBuf; + __m512i* p_bnProcBufRes; + __m512i* p_llrRes; + __m512i* p_res; + uint32_t M, i; +// Process group with 2 CNs +// Process group with 3 CNs +// Process group with 4 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [16128]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [16128]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [16128]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [16128]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [16128]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [16128]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} +// Process group with 5 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [17664]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [17664]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [16512]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [16512]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [16512]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [16512]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [16512]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} +// Process group with 6 CNs + M = (2*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [19584]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [19584]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [16896]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [16896]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [16896]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [16896]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} + p_res = &p_bnProcBufRes[48]; + p_llrRes = (__m512i*) &llrRes [16896]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); +} + p_res = &p_bnProcBufRes[60]; + p_llrRes = (__m512i*) &llrRes [16896]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); +} +// Process group with 7 CNs + M = (4*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [24192]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [24192]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [17664]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [17664]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[48]; + p_llrRes = (__m512i*) &llrRes [17664]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); +} + p_res = &p_bnProcBufRes[72]; + p_llrRes = (__m512i*) &llrRes [17664]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); +} + p_res = &p_bnProcBufRes[96]; + p_llrRes = (__m512i*) &llrRes [17664]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); +} + p_res = &p_bnProcBufRes[120]; + p_llrRes = (__m512i*) &llrRes [17664]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); +} + p_res = &p_bnProcBufRes[144]; + p_llrRes = (__m512i*) &llrRes [17664]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); +} +// Process group with 8 CNs + M = (3*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [34944]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [34944]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} + p_res = &p_bnProcBufRes[54]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); +} + p_res = &p_bnProcBufRes[72]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); +} + p_res = &p_bnProcBufRes[90]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[90 + i]); +} + p_res = &p_bnProcBufRes[108]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); +} + p_res = &p_bnProcBufRes[126]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[126 + i]); +} +// Process group with 9 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [44160]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [44160]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [20352]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [20352]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [20352]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [20352]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [20352]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[30]; + p_llrRes = (__m512i*) &llrRes [20352]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [20352]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} + p_res = &p_bnProcBufRes[42]; + p_llrRes = (__m512i*) &llrRes [20352]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); +} + p_res = &p_bnProcBufRes[48]; + p_llrRes = (__m512i*) &llrRes [20352]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); +} +// Process group with 10 CNs + M = (4*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [47616]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [47616]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [20736]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [20736]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[48]; + p_llrRes = (__m512i*) &llrRes [20736]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); +} + p_res = &p_bnProcBufRes[72]; + p_llrRes = (__m512i*) &llrRes [20736]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); +} + p_res = &p_bnProcBufRes[96]; + p_llrRes = (__m512i*) &llrRes [20736]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); +} + p_res = &p_bnProcBufRes[120]; + p_llrRes = (__m512i*) &llrRes [20736]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); +} + p_res = &p_bnProcBufRes[144]; + p_llrRes = (__m512i*) &llrRes [20736]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); +} + p_res = &p_bnProcBufRes[168]; + p_llrRes = (__m512i*) &llrRes [20736]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]); +} + p_res = &p_bnProcBufRes[192]; + p_llrRes = (__m512i*) &llrRes [20736]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[192 + i]); +} + p_res = &p_bnProcBufRes[216]; + p_llrRes = (__m512i*) &llrRes [20736]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[216 + i]); +} +// Process group with 11 CNs + M = (3*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [62976]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [62976]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [22272]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [22272]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [22272]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} + p_res = &p_bnProcBufRes[54]; + p_llrRes = (__m512i*) &llrRes [22272]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); +} + p_res = &p_bnProcBufRes[72]; + p_llrRes = (__m512i*) &llrRes [22272]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); +} + p_res = &p_bnProcBufRes[90]; + p_llrRes = (__m512i*) &llrRes [22272]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[90 + i]); +} + p_res = &p_bnProcBufRes[108]; + p_llrRes = (__m512i*) &llrRes [22272]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); +} + p_res = &p_bnProcBufRes[126]; + p_llrRes = (__m512i*) &llrRes [22272]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[126 + i]); +} + p_res = &p_bnProcBufRes[144]; + p_llrRes = (__m512i*) &llrRes [22272]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); +} + p_res = &p_bnProcBufRes[162]; + p_llrRes = (__m512i*) &llrRes [22272]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[162 + i]); +} + p_res = &p_bnProcBufRes[180]; + p_llrRes = (__m512i*) &llrRes [22272]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[180 + i]); +} +// Process group with 12 CNs + M = (4*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [75648]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [75648]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [23424]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [23424]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[48]; + p_llrRes = (__m512i*) &llrRes [23424]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); +} + p_res = &p_bnProcBufRes[72]; + p_llrRes = (__m512i*) &llrRes [23424]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); +} + p_res = &p_bnProcBufRes[96]; + p_llrRes = (__m512i*) &llrRes [23424]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); +} + p_res = &p_bnProcBufRes[120]; + p_llrRes = (__m512i*) &llrRes [23424]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); +} + p_res = &p_bnProcBufRes[144]; + p_llrRes = (__m512i*) &llrRes [23424]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); +} + p_res = &p_bnProcBufRes[168]; + p_llrRes = (__m512i*) &llrRes [23424]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]); +} + p_res = &p_bnProcBufRes[192]; + p_llrRes = (__m512i*) &llrRes [23424]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[192 + i]); +} + p_res = &p_bnProcBufRes[216]; + p_llrRes = (__m512i*) &llrRes [23424]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[216 + i]); +} + p_res = &p_bnProcBufRes[240]; + p_llrRes = (__m512i*) &llrRes [23424]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[240 + i]); +} + p_res = &p_bnProcBufRes[264]; + p_llrRes = (__m512i*) &llrRes [23424]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[264 + i]); +} +// Process group with 13 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [94080]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [94080]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [24960]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [24960]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [24960]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [24960]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [24960]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[30]; + p_llrRes = (__m512i*) &llrRes [24960]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [24960]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} + p_res = &p_bnProcBufRes[42]; + p_llrRes = (__m512i*) &llrRes [24960]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); +} + p_res = &p_bnProcBufRes[48]; + p_llrRes = (__m512i*) &llrRes [24960]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); +} + p_res = &p_bnProcBufRes[54]; + p_llrRes = (__m512i*) &llrRes [24960]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); +} + p_res = &p_bnProcBufRes[60]; + p_llrRes = (__m512i*) &llrRes [24960]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); +} + p_res = &p_bnProcBufRes[66]; + p_llrRes = (__m512i*) &llrRes [24960]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]); +} + p_res = &p_bnProcBufRes[72]; + p_llrRes = (__m512i*) &llrRes [24960]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); +} +// Process group with 14 CNs +// Process group with 15 CNs +// Process group with 16 CNs +// Process group with 17 CNs +// Process group with 18 CNs +// Process group with 19 CNs +// Process group with 20 CNs +// Process group with 21 CNs +// Process group with 22 CNs +// Process group with <23 CNs +// Process group with 24 CNs +// Process group with 25 CNs +// Process group with 26 CNs +// Process group with 27 CNs +// Process group with 28 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [99072]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [99072]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[30]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} + p_res = &p_bnProcBufRes[42]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); +} + p_res = &p_bnProcBufRes[48]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); +} + p_res = &p_bnProcBufRes[54]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); +} + p_res = &p_bnProcBufRes[60]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); +} + p_res = &p_bnProcBufRes[66]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]); +} + p_res = &p_bnProcBufRes[72]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); +} + p_res = &p_bnProcBufRes[78]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[78 + i]); +} + p_res = &p_bnProcBufRes[84]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); +} + p_res = &p_bnProcBufRes[90]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[90 + i]); +} + p_res = &p_bnProcBufRes[96]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); +} + p_res = &p_bnProcBufRes[102]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[102 + i]); +} + p_res = &p_bnProcBufRes[108]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); +} + p_res = &p_bnProcBufRes[114]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[114 + i]); +} + p_res = &p_bnProcBufRes[120]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); +} + p_res = &p_bnProcBufRes[126]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[126 + i]); +} + p_res = &p_bnProcBufRes[132]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); +} + p_res = &p_bnProcBufRes[138]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[138 + i]); +} + p_res = &p_bnProcBufRes[144]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); +} + p_res = &p_bnProcBufRes[150]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[150 + i]); +} + p_res = &p_bnProcBufRes[156]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]); +} + p_res = &p_bnProcBufRes[162]; + p_llrRes = (__m512i*) &llrRes [25344]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[162 + i]); +} +// Process group with 29 CNs +// Process group with 30 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [109824]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [109824]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[30]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} + p_res = &p_bnProcBufRes[42]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); +} + p_res = &p_bnProcBufRes[48]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); +} + p_res = &p_bnProcBufRes[54]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); +} + p_res = &p_bnProcBufRes[60]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); +} + p_res = &p_bnProcBufRes[66]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]); +} + p_res = &p_bnProcBufRes[72]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); +} + p_res = &p_bnProcBufRes[78]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[78 + i]); +} + p_res = &p_bnProcBufRes[84]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); +} + p_res = &p_bnProcBufRes[90]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[90 + i]); +} + p_res = &p_bnProcBufRes[96]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); +} + p_res = &p_bnProcBufRes[102]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[102 + i]); +} + p_res = &p_bnProcBufRes[108]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); +} + p_res = &p_bnProcBufRes[114]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[114 + i]); +} + p_res = &p_bnProcBufRes[120]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); +} + p_res = &p_bnProcBufRes[126]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[126 + i]); +} + p_res = &p_bnProcBufRes[132]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); +} + p_res = &p_bnProcBufRes[138]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[138 + i]); +} + p_res = &p_bnProcBufRes[144]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); +} + p_res = &p_bnProcBufRes[150]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[150 + i]); +} + p_res = &p_bnProcBufRes[156]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[156 + i]); +} + p_res = &p_bnProcBufRes[162]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[162 + i]); +} + p_res = &p_bnProcBufRes[168]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]); +} + p_res = &p_bnProcBufRes[174]; + p_llrRes = (__m512i*) &llrRes [25728]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[174 + i]); +} +} diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R23_AVX512.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R23_AVX512.h new file mode 100644 index 0000000000000000000000000000000000000000..bb97cb33475f4d7d4e4466f050fd1c1d4dc0004c --- /dev/null +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R23_AVX512.h @@ -0,0 +1,272 @@ +static inline void nrLDPC_bnProc_BG1_R23_AVX512(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) { + __m512i* p_bnProcBuf; + __m512i* p_bnProcBufRes; + __m512i* p_llrRes; + __m512i* p_res; + uint32_t M, i; +// Process group with 2 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [3456]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [3456]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [3456]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [3456]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} +// Process group with 3 CNs + M = (5*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [4224]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [4224]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [3840]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[30]; + p_llrRes = (__m512i*) &llrRes [3840]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); +} + p_res = &p_bnProcBufRes[60]; + p_llrRes = (__m512i*) &llrRes [3840]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); +} +// Process group with 4 CNs + M = (3*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [9984]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [9984]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [5760]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [5760]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [5760]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} + p_res = &p_bnProcBufRes[54]; + p_llrRes = (__m512i*) &llrRes [5760]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); +} +// Process group with 5 CNs + M = (7*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [14592]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [14592]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [6912]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[42]; + p_llrRes = (__m512i*) &llrRes [6912]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); +} + p_res = &p_bnProcBufRes[84]; + p_llrRes = (__m512i*) &llrRes [6912]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); +} + p_res = &p_bnProcBufRes[126]; + p_llrRes = (__m512i*) &llrRes [6912]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[126 + i]); +} + p_res = &p_bnProcBufRes[168]; + p_llrRes = (__m512i*) &llrRes [6912]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[168 + i]); +} +// Process group with 6 CNs + M = (8*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [28032]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [28032]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [9600]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[48]; + p_llrRes = (__m512i*) &llrRes [9600]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); +} + p_res = &p_bnProcBufRes[96]; + p_llrRes = (__m512i*) &llrRes [9600]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); +} + p_res = &p_bnProcBufRes[144]; + p_llrRes = (__m512i*) &llrRes [9600]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[144 + i]); +} + p_res = &p_bnProcBufRes[192]; + p_llrRes = (__m512i*) &llrRes [9600]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[192 + i]); +} + p_res = &p_bnProcBufRes[240]; + p_llrRes = (__m512i*) &llrRes [9600]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[240 + i]); +} +// Process group with 7 CNs +// Process group with 8 CNs +// Process group with 9 CNs +// Process group with 10 CNs +// Process group with 11 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [46464]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [46464]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [12672]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [12672]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [12672]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [12672]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [12672]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[30]; + p_llrRes = (__m512i*) &llrRes [12672]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [12672]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} + p_res = &p_bnProcBufRes[42]; + p_llrRes = (__m512i*) &llrRes [12672]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); +} + p_res = &p_bnProcBufRes[48]; + p_llrRes = (__m512i*) &llrRes [12672]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); +} + p_res = &p_bnProcBufRes[54]; + p_llrRes = (__m512i*) &llrRes [12672]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); +} + p_res = &p_bnProcBufRes[60]; + p_llrRes = (__m512i*) &llrRes [12672]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); +} +// Process group with 12 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [50688]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [50688]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [13056]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [13056]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [13056]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [13056]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [13056]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[30]; + p_llrRes = (__m512i*) &llrRes [13056]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [13056]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} + p_res = &p_bnProcBufRes[42]; + p_llrRes = (__m512i*) &llrRes [13056]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); +} + p_res = &p_bnProcBufRes[48]; + p_llrRes = (__m512i*) &llrRes [13056]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); +} + p_res = &p_bnProcBufRes[54]; + p_llrRes = (__m512i*) &llrRes [13056]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); +} + p_res = &p_bnProcBufRes[60]; + p_llrRes = (__m512i*) &llrRes [13056]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); +} + p_res = &p_bnProcBufRes[66]; + p_llrRes = (__m512i*) &llrRes [13056]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]); +} +// Process group with 13 CNs +// Process group with 14 CNs +// Process group with 15 CNs +// Process group with 16 CNs +// Process group with 17 CNs +// Process group with 18 CNs +// Process group with 19 CNs +// Process group with 20 CNs +// Process group with 21 CNs +// Process group with 22 CNs +// Process group with <23 CNs +// Process group with 24 CNs +// Process group with 25 CNs +// Process group with 26 CNs +// Process group with 27 CNs +// Process group with 28 CNs +// Process group with 29 CNs +// Process group with 30 CNs +} diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R89_AVX512.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R89_AVX512.h new file mode 100644 index 0000000000000000000000000000000000000000..96006f76cfe22515cfca6b7a51369a3b5a71fbc4 --- /dev/null +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG1_R89_AVX512.h @@ -0,0 +1,118 @@ +static inline void nrLDPC_bnProc_BG1_R89_AVX512(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) { + __m512i* p_bnProcBuf; + __m512i* p_bnProcBufRes; + __m512i* p_llrRes; + __m512i* p_res; + uint32_t M, i; +// Process group with 2 CNs + M = (3*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [384]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [384]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [384]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [384]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} +// Process group with 3 CNs + M = (21*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [2688]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [2688]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [1536]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[126]; + p_llrRes = (__m512i*) &llrRes [1536]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[126 + i]); +} + p_res = &p_bnProcBufRes[252]; + p_llrRes = (__m512i*) &llrRes [1536]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[252 + i]); +} +// Process group with 4 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [26880]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [26880]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [9600]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [9600]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [9600]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [9600]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} +// Process group with 5 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [28416]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [28416]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [9984]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [9984]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [9984]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [9984]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [9984]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} +// Process group with 6 CNs +// Process group with 7 CNs +// Process group with 8 CNs +// Process group with 9 CNs +// Process group with 10 CNs +// Process group with 11 CNs +// Process group with 12 CNs +// Process group with 13 CNs +// Process group with 14 CNs +// Process group with 15 CNs +// Process group with 16 CNs +// Process group with 17 CNs +// Process group with 18 CNs +// Process group with 19 CNs +// Process group with 20 CNs +// Process group with 21 CNs +// Process group with 22 CNs +// Process group with <23 CNs +// Process group with 24 CNs +// Process group with 25 CNs +// Process group with 26 CNs +// Process group with 27 CNs +// Process group with 28 CNs +// Process group with 29 CNs +// Process group with 30 CNs +} diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R13_AVX512.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R13_AVX512.h new file mode 100644 index 0000000000000000000000000000000000000000..b3a701584b204ca44c254acfef6999aea56d89ef --- /dev/null +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R13_AVX512.h @@ -0,0 +1,440 @@ +#include <stdint.h> +#include <immintrin.h> +void nrLDPC_bnProc_BG2_R13_AVX512(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) { + __m512i* p_bnProcBuf; + __m512i* p_bnProcBufRes; + __m512i* p_llrRes; + __m512i* p_res; + uint32_t M, i; +// Process group with 2 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [6912]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [6912]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [6912]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [6912]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} +// Process group with 3 CNs +// Process group with 4 CNs + M = (2*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [7680]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [7680]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [7296]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [7296]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [7296]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [7296]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} +// Process group with 5 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [10752]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [10752]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [8064]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [8064]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [8064]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [8064]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [8064]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} +// Process group with 6 CNs + M = (5*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [12672]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [12672]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [8448]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[30]; + p_llrRes = (__m512i*) &llrRes [8448]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); +} + p_res = &p_bnProcBufRes[60]; + p_llrRes = (__m512i*) &llrRes [8448]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); +} + p_res = &p_bnProcBufRes[90]; + p_llrRes = (__m512i*) &llrRes [8448]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[90 + i]); +} + p_res = &p_bnProcBufRes[120]; + p_llrRes = (__m512i*) &llrRes [8448]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); +} + p_res = &p_bnProcBufRes[150]; + p_llrRes = (__m512i*) &llrRes [8448]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[150 + i]); +} +// Process group with 7 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [24192]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [24192]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [10368]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [10368]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [10368]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [10368]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [10368]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[30]; + p_llrRes = (__m512i*) &llrRes [10368]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [10368]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} +// Process group with 8 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [26880]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [26880]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [10752]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [10752]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [10752]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [10752]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [10752]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[30]; + p_llrRes = (__m512i*) &llrRes [10752]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [10752]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} + p_res = &p_bnProcBufRes[42]; + p_llrRes = (__m512i*) &llrRes [10752]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); +} +// Process group with 9 CNs +// Process group with 10 CNs +// Process group with 11 CNs +// Process group with 12 CNs +// Process group with 13 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [29952]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [29952]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [11136]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [11136]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [11136]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [11136]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [11136]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[30]; + p_llrRes = (__m512i*) &llrRes [11136]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [11136]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} + p_res = &p_bnProcBufRes[42]; + p_llrRes = (__m512i*) &llrRes [11136]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); +} + p_res = &p_bnProcBufRes[48]; + p_llrRes = (__m512i*) &llrRes [11136]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); +} + p_res = &p_bnProcBufRes[54]; + p_llrRes = (__m512i*) &llrRes [11136]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); +} + p_res = &p_bnProcBufRes[60]; + p_llrRes = (__m512i*) &llrRes [11136]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); +} + p_res = &p_bnProcBufRes[66]; + p_llrRes = (__m512i*) &llrRes [11136]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]); +} + p_res = &p_bnProcBufRes[72]; + p_llrRes = (__m512i*) &llrRes [11136]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); +} +// Process group with 14 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [34944]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [34944]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [11520]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [11520]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [11520]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [11520]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [11520]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[30]; + p_llrRes = (__m512i*) &llrRes [11520]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [11520]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} + p_res = &p_bnProcBufRes[42]; + p_llrRes = (__m512i*) &llrRes [11520]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); +} + p_res = &p_bnProcBufRes[48]; + p_llrRes = (__m512i*) &llrRes [11520]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); +} + p_res = &p_bnProcBufRes[54]; + p_llrRes = (__m512i*) &llrRes [11520]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); +} + p_res = &p_bnProcBufRes[60]; + p_llrRes = (__m512i*) &llrRes [11520]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); +} + p_res = &p_bnProcBufRes[66]; + p_llrRes = (__m512i*) &llrRes [11520]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]); +} + p_res = &p_bnProcBufRes[72]; + p_llrRes = (__m512i*) &llrRes [11520]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); +} + p_res = &p_bnProcBufRes[78]; + p_llrRes = (__m512i*) &llrRes [11520]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[78 + i]); +} +// Process group with 15 CNs +// Process group with 16 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [40320]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [40320]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [11904]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [11904]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [11904]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [11904]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [11904]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[30]; + p_llrRes = (__m512i*) &llrRes [11904]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [11904]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} + p_res = &p_bnProcBufRes[42]; + p_llrRes = (__m512i*) &llrRes [11904]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); +} + p_res = &p_bnProcBufRes[48]; + p_llrRes = (__m512i*) &llrRes [11904]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); +} + p_res = &p_bnProcBufRes[54]; + p_llrRes = (__m512i*) &llrRes [11904]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); +} + p_res = &p_bnProcBufRes[60]; + p_llrRes = (__m512i*) &llrRes [11904]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); +} + p_res = &p_bnProcBufRes[66]; + p_llrRes = (__m512i*) &llrRes [11904]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]); +} + p_res = &p_bnProcBufRes[72]; + p_llrRes = (__m512i*) &llrRes [11904]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); +} + p_res = &p_bnProcBufRes[78]; + p_llrRes = (__m512i*) &llrRes [11904]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[78 + i]); +} + p_res = &p_bnProcBufRes[84]; + p_llrRes = (__m512i*) &llrRes [11904]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); +} + p_res = &p_bnProcBufRes[90]; + p_llrRes = (__m512i*) &llrRes [11904]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[90 + i]); +} +// Process group with 17 CNs +// Process group with 18 CNs +// Process group with 19 CNs +// Process group with 20 CNs +// Process group with 21 CNs +// Process group with 22 CNs +// Process group with <23 CNs +// Process group with 24 CNs +// Process group with 25 CNs +// Process group with 26 CNs +// Process group with 27 CNs +// Process group with 28 CNs +// Process group with 29 CNs +// Process group with 30 CNs +} diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R15_AVX512.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R15_AVX512.h new file mode 100644 index 0000000000000000000000000000000000000000..4c56fca2f3d8c5234b510dba5dd0fd6886606d62 --- /dev/null +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R15_AVX512.h @@ -0,0 +1,799 @@ +#include <stdint.h> +#include <immintrin.h> +void nrLDPC_bnProc_BG2_R15_AVX512(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) { + __m512i* p_bnProcBuf; + __m512i* p_bnProcBufRes; + __m512i* p_llrRes; + __m512i* p_res; + uint32_t M, i; +// Process group with 2 CNs +// Process group with 3 CNs +// Process group with 4 CNs +// Process group with 5 CNs + M = (2*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [14592]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [14592]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [14592]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [14592]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [14592]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [14592]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} + p_res = &p_bnProcBufRes[48]; + p_llrRes = (__m512i*) &llrRes [14592]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); +} +// Process group with 6 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [18432]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [18432]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [15360]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [15360]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [15360]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [15360]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [15360]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[30]; + p_llrRes = (__m512i*) &llrRes [15360]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); +} +// Process group with 7 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [20736]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [20736]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [15744]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [15744]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [15744]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [15744]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [15744]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[30]; + p_llrRes = (__m512i*) &llrRes [15744]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [15744]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} +// Process group with 8 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [23424]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [23424]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [16128]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [16128]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [16128]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [16128]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [16128]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[30]; + p_llrRes = (__m512i*) &llrRes [16128]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [16128]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} + p_res = &p_bnProcBufRes[42]; + p_llrRes = (__m512i*) &llrRes [16128]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); +} +// Process group with 9 CNs + M = (2*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [26496]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [26496]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [16512]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [16512]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [16512]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [16512]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} + p_res = &p_bnProcBufRes[48]; + p_llrRes = (__m512i*) &llrRes [16512]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); +} + p_res = &p_bnProcBufRes[60]; + p_llrRes = (__m512i*) &llrRes [16512]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); +} + p_res = &p_bnProcBufRes[72]; + p_llrRes = (__m512i*) &llrRes [16512]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); +} + p_res = &p_bnProcBufRes[84]; + p_llrRes = (__m512i*) &llrRes [16512]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); +} + p_res = &p_bnProcBufRes[96]; + p_llrRes = (__m512i*) &llrRes [16512]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); +} +// Process group with 10 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [33408]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [33408]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [17280]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [17280]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [17280]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [17280]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [17280]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[30]; + p_llrRes = (__m512i*) &llrRes [17280]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [17280]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} + p_res = &p_bnProcBufRes[42]; + p_llrRes = (__m512i*) &llrRes [17280]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); +} + p_res = &p_bnProcBufRes[48]; + p_llrRes = (__m512i*) &llrRes [17280]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); +} + p_res = &p_bnProcBufRes[54]; + p_llrRes = (__m512i*) &llrRes [17280]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); +} +// Process group with 11 CNs +// Process group with 12 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [37248]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [37248]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [17664]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [17664]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [17664]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [17664]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [17664]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[30]; + p_llrRes = (__m512i*) &llrRes [17664]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [17664]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} + p_res = &p_bnProcBufRes[42]; + p_llrRes = (__m512i*) &llrRes [17664]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); +} + p_res = &p_bnProcBufRes[48]; + p_llrRes = (__m512i*) &llrRes [17664]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); +} + p_res = &p_bnProcBufRes[54]; + p_llrRes = (__m512i*) &llrRes [17664]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); +} + p_res = &p_bnProcBufRes[60]; + p_llrRes = (__m512i*) &llrRes [17664]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); +} + p_res = &p_bnProcBufRes[66]; + p_llrRes = (__m512i*) &llrRes [17664]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]); +} +// Process group with 13 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [41856]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [41856]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [18048]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [18048]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [18048]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [18048]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [18048]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[30]; + p_llrRes = (__m512i*) &llrRes [18048]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [18048]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} + p_res = &p_bnProcBufRes[42]; + p_llrRes = (__m512i*) &llrRes [18048]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); +} + p_res = &p_bnProcBufRes[48]; + p_llrRes = (__m512i*) &llrRes [18048]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); +} + p_res = &p_bnProcBufRes[54]; + p_llrRes = (__m512i*) &llrRes [18048]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); +} + p_res = &p_bnProcBufRes[60]; + p_llrRes = (__m512i*) &llrRes [18048]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); +} + p_res = &p_bnProcBufRes[66]; + p_llrRes = (__m512i*) &llrRes [18048]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]); +} + p_res = &p_bnProcBufRes[72]; + p_llrRes = (__m512i*) &llrRes [18048]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); +} +// Process group with 14 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [46848]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [46848]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [18432]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [18432]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [18432]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [18432]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [18432]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[30]; + p_llrRes = (__m512i*) &llrRes [18432]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [18432]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} + p_res = &p_bnProcBufRes[42]; + p_llrRes = (__m512i*) &llrRes [18432]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); +} + p_res = &p_bnProcBufRes[48]; + p_llrRes = (__m512i*) &llrRes [18432]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); +} + p_res = &p_bnProcBufRes[54]; + p_llrRes = (__m512i*) &llrRes [18432]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); +} + p_res = &p_bnProcBufRes[60]; + p_llrRes = (__m512i*) &llrRes [18432]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); +} + p_res = &p_bnProcBufRes[66]; + p_llrRes = (__m512i*) &llrRes [18432]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]); +} + p_res = &p_bnProcBufRes[72]; + p_llrRes = (__m512i*) &llrRes [18432]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); +} + p_res = &p_bnProcBufRes[78]; + p_llrRes = (__m512i*) &llrRes [18432]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[78 + i]); +} +// Process group with 15 CNs +// Process group with 16 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [52224]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [52224]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [18816]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [18816]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [18816]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [18816]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [18816]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[30]; + p_llrRes = (__m512i*) &llrRes [18816]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [18816]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} + p_res = &p_bnProcBufRes[42]; + p_llrRes = (__m512i*) &llrRes [18816]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); +} + p_res = &p_bnProcBufRes[48]; + p_llrRes = (__m512i*) &llrRes [18816]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); +} + p_res = &p_bnProcBufRes[54]; + p_llrRes = (__m512i*) &llrRes [18816]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); +} + p_res = &p_bnProcBufRes[60]; + p_llrRes = (__m512i*) &llrRes [18816]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); +} + p_res = &p_bnProcBufRes[66]; + p_llrRes = (__m512i*) &llrRes [18816]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]); +} + p_res = &p_bnProcBufRes[72]; + p_llrRes = (__m512i*) &llrRes [18816]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); +} + p_res = &p_bnProcBufRes[78]; + p_llrRes = (__m512i*) &llrRes [18816]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[78 + i]); +} + p_res = &p_bnProcBufRes[84]; + p_llrRes = (__m512i*) &llrRes [18816]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); +} + p_res = &p_bnProcBufRes[90]; + p_llrRes = (__m512i*) &llrRes [18816]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[90 + i]); +} +// Process group with 17 CNs +// Process group with 18 CNs +// Process group with 19 CNs +// Process group with 20 CNs +// Process group with 21 CNs +// Process group with 22 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [58368]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [58368]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[30]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} + p_res = &p_bnProcBufRes[42]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); +} + p_res = &p_bnProcBufRes[48]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); +} + p_res = &p_bnProcBufRes[54]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); +} + p_res = &p_bnProcBufRes[60]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); +} + p_res = &p_bnProcBufRes[66]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]); +} + p_res = &p_bnProcBufRes[72]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); +} + p_res = &p_bnProcBufRes[78]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[78 + i]); +} + p_res = &p_bnProcBufRes[84]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); +} + p_res = &p_bnProcBufRes[90]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[90 + i]); +} + p_res = &p_bnProcBufRes[96]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); +} + p_res = &p_bnProcBufRes[102]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[102 + i]); +} + p_res = &p_bnProcBufRes[108]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); +} + p_res = &p_bnProcBufRes[114]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[114 + i]); +} + p_res = &p_bnProcBufRes[120]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); +} + p_res = &p_bnProcBufRes[126]; + p_llrRes = (__m512i*) &llrRes [19200]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[126 + i]); +} +// Process group with <23 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [66816]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [66816]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [19584]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [19584]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [19584]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [19584]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [19584]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[30]; + p_llrRes = (__m512i*) &llrRes [19584]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [19584]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} + p_res = &p_bnProcBufRes[42]; + p_llrRes = (__m512i*) &llrRes [19584]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[42 + i]); +} + p_res = &p_bnProcBufRes[48]; + p_llrRes = (__m512i*) &llrRes [19584]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); +} + p_res = &p_bnProcBufRes[54]; + p_llrRes = (__m512i*) &llrRes [19584]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); +} + p_res = &p_bnProcBufRes[60]; + p_llrRes = (__m512i*) &llrRes [19584]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); +} + p_res = &p_bnProcBufRes[66]; + p_llrRes = (__m512i*) &llrRes [19584]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[66 + i]); +} + p_res = &p_bnProcBufRes[72]; + p_llrRes = (__m512i*) &llrRes [19584]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[72 + i]); +} + p_res = &p_bnProcBufRes[78]; + p_llrRes = (__m512i*) &llrRes [19584]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[78 + i]); +} + p_res = &p_bnProcBufRes[84]; + p_llrRes = (__m512i*) &llrRes [19584]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[84 + i]); +} + p_res = &p_bnProcBufRes[90]; + p_llrRes = (__m512i*) &llrRes [19584]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[90 + i]); +} + p_res = &p_bnProcBufRes[96]; + p_llrRes = (__m512i*) &llrRes [19584]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[96 + i]); +} + p_res = &p_bnProcBufRes[102]; + p_llrRes = (__m512i*) &llrRes [19584]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[102 + i]); +} + p_res = &p_bnProcBufRes[108]; + p_llrRes = (__m512i*) &llrRes [19584]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[108 + i]); +} + p_res = &p_bnProcBufRes[114]; + p_llrRes = (__m512i*) &llrRes [19584]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[114 + i]); +} + p_res = &p_bnProcBufRes[120]; + p_llrRes = (__m512i*) &llrRes [19584]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[120 + i]); +} + p_res = &p_bnProcBufRes[126]; + p_llrRes = (__m512i*) &llrRes [19584]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[126 + i]); +} + p_res = &p_bnProcBufRes[132]; + p_llrRes = (__m512i*) &llrRes [19584]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[132 + i]); +} +// Process group with 24 CNs +// Process group with 25 CNs +// Process group with 26 CNs +// Process group with 27 CNs +// Process group with 28 CNs +// Process group with 29 CNs +// Process group with 30 CNs +} diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R23_AVX512.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R23_AVX512.h new file mode 100644 index 0000000000000000000000000000000000000000..81d420ccfd040d0194e1eb65658fc86bf608c029 --- /dev/null +++ b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/bnProc_avx512/nrLDPC_bnProc_BG2_R23_AVX512.h @@ -0,0 +1,153 @@ +#include <stdint.h> +#include <immintrin.h> +void nrLDPC_bnProc_BG2_R23_AVX512(int8_t* bnProcBuf,int8_t* bnProcBufRes, int8_t* llrRes, uint16_t Z ) { + __m512i* p_bnProcBuf; + __m512i* p_bnProcBufRes; + __m512i* p_llrRes; + __m512i* p_res; + uint32_t M, i; +// Process group with 2 CNs + M = (3*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [1152]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [1152]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [1152]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [1152]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} +// Process group with 3 CNs + M = (5*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [3456]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [3456]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [2304]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[30]; + p_llrRes = (__m512i*) &llrRes [2304]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); +} + p_res = &p_bnProcBufRes[60]; + p_llrRes = (__m512i*) &llrRes [2304]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[60 + i]); +} +// Process group with 4 CNs + M = (3*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [9216]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [9216]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [4224]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [4224]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [4224]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} + p_res = &p_bnProcBufRes[54]; + p_llrRes = (__m512i*) &llrRes [4224]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[54 + i]); +} +// Process group with 5 CNs + M = (2*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [13824]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [13824]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [5376]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [5376]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [5376]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[36]; + p_llrRes = (__m512i*) &llrRes [5376]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[36 + i]); +} + p_res = &p_bnProcBufRes[48]; + p_llrRes = (__m512i*) &llrRes [5376]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[48 + i]); +} +// Process group with 6 CNs + M = (1*Z + 63)>>6; + p_bnProcBuf = (__m512i*) &bnProcBuf [17664]; + p_bnProcBufRes = (__m512i*) &bnProcBufRes [17664]; + p_res = &p_bnProcBufRes[0]; + p_llrRes = (__m512i*) &llrRes [6144]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[0 + i]); +} + p_res = &p_bnProcBufRes[6]; + p_llrRes = (__m512i*) &llrRes [6144]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[6 + i]); +} + p_res = &p_bnProcBufRes[12]; + p_llrRes = (__m512i*) &llrRes [6144]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[12 + i]); +} + p_res = &p_bnProcBufRes[18]; + p_llrRes = (__m512i*) &llrRes [6144]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[18 + i]); +} + p_res = &p_bnProcBufRes[24]; + p_llrRes = (__m512i*) &llrRes [6144]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[24 + i]); +} + p_res = &p_bnProcBufRes[30]; + p_llrRes = (__m512i*) &llrRes [6144]; + for (i=0;i<M;i++) { + p_res[i] = _mm512_subs_epi8(p_llrRes[i], p_bnProcBuf[30 + i]); +} +// Process group with 7 CNs +// Process group with 8 CNs +// Process group with 9 CNs +// Process group with 10 CNs +// Process group with 11 CNs +// Process group with 12 CNs +// Process group with 13 CNs +// Process group with 14 CNs +// Process group with 15 CNs +// Process group with 16 CNs +// Process group with 17 CNs +// Process group with 18 CNs +// Process group with 19 CNs +// Process group with 20 CNs +// Process group with 21 CNs +// Process group with 22 CNs +// Process group with <23 CNs +// Process group with 24 CNs +// Process group with 25 CNs +// Process group with 26 CNs +// Process group with 27 CNs +// Process group with 28 CNs +// Process group with 29 CNs +// Process group with 30 CNs +} diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/cnProc/nrLDPC_cnProc_BG1_R13_AVX2.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/cnProc/nrLDPC_cnProc_BG1_R13_AVX2.c deleted file mode 100644 index aa171558fd08dd9be3e6ae0f594162171019da4c..0000000000000000000000000000000000000000 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/cnProc/nrLDPC_cnProc_BG1_R13_AVX2.c +++ /dev/null @@ -1,2346 +0,0 @@ -#include <stdint.h> -#include <immintrin.h> -static inline void nrLDPC_cnProc_BG1_R13_AVX2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z) { -//Process group with 3 BNs - __m256i ymm0, min, sgn,ones,maxLLR; - ones = _mm256_set1_epi8((char)1); - maxLLR = _mm256_set1_epi8((char)127); - uint32_t M; - int8_t* cnProcBuf = p_procBuf->cnProcBuf; - int8_t* cnProcBufRes = p_procBuf->cnProcBufRes; - M = (1*Z + 31)>>5; - for (int i=0;i<M;i+=2) { - ymm0 = ((__m256i*)cnProcBuf)[12+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[24+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[0+i] = _mm256_sign_epi8(min, sgn); - ymm0 = ((__m256i*)cnProcBuf)[13+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[25+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i+=2) { - ymm0 = ((__m256i*)cnProcBuf)[0+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[24+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[12+i] = _mm256_sign_epi8(min, sgn); - ymm0 = ((__m256i*)cnProcBuf)[1+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[25+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[13+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i+=2) { - ymm0 = ((__m256i*)cnProcBuf)[0+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[12+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[24+i] = _mm256_sign_epi8(min, sgn); - ymm0 = ((__m256i*)cnProcBuf)[1+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[13+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[25+i] = _mm256_sign_epi8(min, sgn); - } -//Process group with 4 BNs - M = (5*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[96+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[156+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[36+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[36+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[156+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[96+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[36+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[96+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[156+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[36+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[96+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[156+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[216+i] = _mm256_sign_epi8(min, sgn); - } -//Process group with 5 BNs - M = (18*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[492+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[708+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[924+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1140+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[276+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[276+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[708+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[924+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1140+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[492+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[276+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[492+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[924+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1140+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[708+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[276+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[492+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[708+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1140+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[924+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[276+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[492+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[708+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[924+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1140+i] = _mm256_sign_epi8(min, sgn); - } -//Process group with 6 BNs -M = (8*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1452+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1548+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1644+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1740+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1836+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1356+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1356+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1548+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1644+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1740+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1836+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1452+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1356+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1452+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1644+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1740+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1836+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1548+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1356+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1452+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1548+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1740+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1836+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1644+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1356+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1452+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1548+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1644+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1836+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1740+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1356+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1452+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1548+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1644+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1740+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1836+i] = _mm256_sign_epi8(min, sgn); - } -//Process group with 7 BNs -M = (5*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1992+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2112+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2232+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1932+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2112+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2232+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1992+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1992+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2112+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2232+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2052+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1992+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2232+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2112+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1992+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2112+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2232+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2172+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1992+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2112+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2232+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1992+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2112+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2232+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2292+i] = _mm256_sign_epi8(min, sgn); - } -//Process group with 8 BNs -M = (2*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2376+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2400+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2424+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2448+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2464+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2496+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2520+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2352+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2352+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2400+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2424+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2448+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2464+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2496+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2520+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2376+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2352+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2376+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2424+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2448+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2464+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2496+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2520+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2400+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2352+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2376+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2400+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2448+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2464+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2496+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2520+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2424+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2352+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2376+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2400+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2424+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2464+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2496+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2520+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2448+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2352+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2376+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2400+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2424+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2448+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2496+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2520+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2472+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2352+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2376+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2400+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2424+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2448+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2464+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2520+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2496+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2352+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2376+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2400+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2424+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2448+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2464+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2496+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2520+i] = _mm256_sign_epi8(min, sgn); - } -//Process group with 9 BNs -M = (2*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2568+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2592+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2616+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2640+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2664+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2688+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2712+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2736+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2544+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2544+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2592+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2616+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2640+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2664+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2688+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2712+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2736+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2568+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2544+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2568+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2616+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2640+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2664+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2688+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2712+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2736+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2592+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2544+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2568+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2592+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2640+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2664+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2688+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2712+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2736+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2616+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2544+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2568+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2592+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2616+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2664+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2688+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2712+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2736+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2640+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2544+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2568+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2592+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2616+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2640+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2688+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2712+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2736+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2664+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2544+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2568+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2592+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2616+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2640+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2664+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2712+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2736+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2688+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2544+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2568+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2592+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2616+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2640+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2664+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2688+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2736+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2712+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2544+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2568+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2592+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2616+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2640+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2664+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2688+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2712+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2736+i] = _mm256_sign_epi8(min, sgn); - } -//Process group with 10 BNs - M = (1*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2772+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2784+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2796+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2808+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2820+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2832+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2844+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2856+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2868+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2760+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2760+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2784+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2796+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2808+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2820+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2832+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2844+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2856+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2868+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2772+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2760+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2772+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2796+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2808+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2820+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2832+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2844+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2856+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2868+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2784+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2760+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2772+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2784+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2808+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2820+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2832+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2844+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2856+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2868+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2796+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2760+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2772+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2784+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2796+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2820+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2832+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2844+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2856+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2868+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2808+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2760+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2772+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2784+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2796+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2808+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2832+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2844+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2856+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2868+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2820+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2760+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2772+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2784+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2796+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2808+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2820+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2844+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2856+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2868+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2832+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2760+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2772+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2784+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2796+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2808+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2820+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2832+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2856+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2868+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2844+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2760+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2772+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2784+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2796+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2808+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2820+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2832+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2844+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2868+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2856+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2760+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2772+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2784+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2796+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2808+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2820+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2832+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2844+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2856+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2868+i] = _mm256_sign_epi8(min, sgn); - } -//Process group with 19 BNs - M = (4*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2880+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2928+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2976+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3024+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3072+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3120+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3168+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3216+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3264+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3312+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3360+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3408+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3456+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3504+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3552+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3600+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3648+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3696+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3744+i] = _mm256_sign_epi8(min, sgn); - } -} diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/cnProc/nrLDPC_cnProc_BG1_R23_AVX2.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/cnProc/nrLDPC_cnProc_BG1_R23_AVX2.c deleted file mode 100644 index 4c667e1b9843fd0e7095629ad3794a9b1d4b6029..0000000000000000000000000000000000000000 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/cnProc/nrLDPC_cnProc_BG1_R23_AVX2.c +++ /dev/null @@ -1,2097 +0,0 @@ -#include <stdint.h> -#include <immintrin.h> -static inline void nrLDPC_cnProc_BG1_R23_AVX2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z) { -//Process group with 3 BNs - __m256i ymm0, min, sgn,ones,maxLLR; - ones = _mm256_set1_epi8((char)1); - maxLLR = _mm256_set1_epi8((char)127); - uint32_t M; - int8_t* cnProcBuf = p_procBuf->cnProcBuf; - int8_t* cnProcBufRes = p_procBuf->cnProcBufRes; - M = (1*Z + 31)>>5; - for (int i=0;i<M;i+=2) { - ymm0 = ((__m256i*)cnProcBuf)[12+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[24+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[0+i] = _mm256_sign_epi8(min, sgn); - ymm0 = ((__m256i*)cnProcBuf)[13+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[25+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i+=2) { - ymm0 = ((__m256i*)cnProcBuf)[0+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[24+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[12+i] = _mm256_sign_epi8(min, sgn); - ymm0 = ((__m256i*)cnProcBuf)[1+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[25+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[13+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i+=2) { - ymm0 = ((__m256i*)cnProcBuf)[0+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[12+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[24+i] = _mm256_sign_epi8(min, sgn); - ymm0 = ((__m256i*)cnProcBuf)[1+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[13+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[25+i] = _mm256_sign_epi8(min, sgn); - } -//Process group with 4 BNs -//Process group with 5 BNs -//Process group with 6 BNs -//Process group with 7 BNs -M = (3*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1992+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2112+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2232+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1932+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2112+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2232+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1992+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1992+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2112+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2232+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2052+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1992+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2232+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2112+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1992+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2112+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2232+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2172+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1992+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2112+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2232+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1992+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2112+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2232+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2292+i] = _mm256_sign_epi8(min, sgn); - } -//Process group with 8 BNs -M = (2*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2376+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2400+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2424+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2448+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2464+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2496+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2520+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2352+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2352+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2400+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2424+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2448+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2464+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2496+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2520+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2376+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2352+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2376+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2424+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2448+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2464+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2496+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2520+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2400+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2352+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2376+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2400+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2448+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2464+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2496+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2520+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2424+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2352+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2376+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2400+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2424+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2464+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2496+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2520+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2448+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2352+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2376+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2400+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2424+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2448+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2496+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2520+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2472+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2352+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2376+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2400+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2424+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2448+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2464+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2520+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2496+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2352+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2376+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2400+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2424+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2448+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2464+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2496+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2520+i] = _mm256_sign_epi8(min, sgn); - } -//Process group with 9 BNs -M = (2*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2568+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2592+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2616+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2640+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2664+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2688+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2712+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2736+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2544+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2544+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2592+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2616+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2640+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2664+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2688+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2712+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2736+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2568+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2544+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2568+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2616+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2640+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2664+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2688+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2712+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2736+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2592+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2544+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2568+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2592+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2640+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2664+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2688+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2712+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2736+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2616+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2544+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2568+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2592+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2616+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2664+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2688+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2712+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2736+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2640+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2544+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2568+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2592+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2616+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2640+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2688+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2712+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2736+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2664+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2544+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2568+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2592+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2616+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2640+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2664+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2712+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2736+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2688+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2544+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2568+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2592+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2616+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2640+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2664+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2688+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2736+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2712+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2544+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2568+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2592+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2616+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2640+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2664+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2688+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2712+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2736+i] = _mm256_sign_epi8(min, sgn); - } -//Process group with 10 BNs - M = (1*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2772+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2784+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2796+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2808+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2820+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2832+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2844+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2856+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2868+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2760+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2760+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2784+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2796+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2808+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2820+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2832+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2844+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2856+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2868+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2772+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2760+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2772+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2796+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2808+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2820+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2832+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2844+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2856+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2868+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2784+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2760+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2772+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2784+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2808+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2820+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2832+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2844+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2856+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2868+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2796+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2760+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2772+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2784+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2796+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2820+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2832+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2844+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2856+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2868+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2808+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2760+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2772+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2784+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2796+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2808+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2832+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2844+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2856+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2868+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2820+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2760+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2772+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2784+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2796+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2808+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2820+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2844+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2856+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2868+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2832+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2760+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2772+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2784+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2796+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2808+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2820+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2832+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2856+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2868+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2844+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2760+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2772+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2784+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2796+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2808+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2820+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2832+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2844+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2868+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2856+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2760+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2772+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2784+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2796+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2808+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2820+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2832+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2844+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2856+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2868+i] = _mm256_sign_epi8(min, sgn); - } -//Process group with 19 BNs - M = (4*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2880+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2928+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2976+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3024+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3072+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3120+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3168+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3216+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3264+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3312+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3360+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3408+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3456+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3504+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3552+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3600+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3648+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3696+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3744+i] = _mm256_sign_epi8(min, sgn); - } -} diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/cnProc/nrLDPC_cnProc_BG1_R89_AVX2.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/cnProc/nrLDPC_cnProc_BG1_R89_AVX2.c deleted file mode 100644 index dc7a9a92a41b4f6191ac18150a0f63b5a563431b..0000000000000000000000000000000000000000 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/cnProc/nrLDPC_cnProc_BG1_R89_AVX2.c +++ /dev/null @@ -1,1177 +0,0 @@ -#include <stdint.h> -#include <immintrin.h> -static inline void nrLDPC_cnProc_BG1_R89_AVX2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z) { -//Process group with 3 BNs - __m256i ymm0, min, sgn,ones,maxLLR; - ones = _mm256_set1_epi8((char)1); - maxLLR = _mm256_set1_epi8((char)127); - uint32_t M; - int8_t* cnProcBuf = p_procBuf->cnProcBuf; - int8_t* cnProcBufRes = p_procBuf->cnProcBufRes; - M = (1*Z + 31)>>5; - for (int i=0;i<M;i+=2) { - ymm0 = ((__m256i*)cnProcBuf)[12+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[24+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[0+i] = _mm256_sign_epi8(min, sgn); - ymm0 = ((__m256i*)cnProcBuf)[13+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[25+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i+=2) { - ymm0 = ((__m256i*)cnProcBuf)[0+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[24+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[12+i] = _mm256_sign_epi8(min, sgn); - ymm0 = ((__m256i*)cnProcBuf)[1+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[25+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[13+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i+=2) { - ymm0 = ((__m256i*)cnProcBuf)[0+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[12+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[24+i] = _mm256_sign_epi8(min, sgn); - ymm0 = ((__m256i*)cnProcBuf)[1+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[13+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[25+i] = _mm256_sign_epi8(min, sgn); - } -//Process group with 4 BNs -//Process group with 5 BNs -//Process group with 6 BNs -//Process group with 7 BNs -//Process group with 8 BNs -//Process group with 9 BNs -//Process group with 10 BNs -//Process group with 19 BNs - M = (4*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2880+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2928+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2976+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3024+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3072+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3120+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3168+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3216+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3264+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3312+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3360+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3408+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3456+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3504+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3552+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3600+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3648+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3744+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3696+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2880+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2928+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2976+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3024+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3072+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3120+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3168+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3216+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3264+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3312+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3360+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3408+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3504+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3552+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3600+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3648+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[3696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[3744+i] = _mm256_sign_epi8(min, sgn); - } -} diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/cnProc/nrLDPC_cnProc_BG2_R13_AVX2.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/cnProc/nrLDPC_cnProc_BG2_R13_AVX2.c deleted file mode 100644 index 50c3bc0cb2ce009815a69640194aa833d8838d06..0000000000000000000000000000000000000000 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/cnProc/nrLDPC_cnProc_BG2_R13_AVX2.c +++ /dev/null @@ -1,777 +0,0 @@ -#include <stdint.h> -#include <immintrin.h> -static inline void nrLDPC_cnProc_BG2_R13_AVX2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z) { -//Process group with 3 BNs - __m256i ymm0, min, sgn,ones,maxLLR; - ones = _mm256_set1_epi8((char)1); - maxLLR = _mm256_set1_epi8((char)127); - uint32_t M; - int8_t* cnProcBuf = p_procBuf->cnProcBuf; - int8_t* cnProcBufRes = p_procBuf->cnProcBufRes; -//Process group with 4 BNs - M = (8*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[456+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[936+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[216+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[216+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[936+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[456+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[216+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[936+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[696+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[216+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[936+i] = _mm256_sign_epi8(min, sgn); - } -//Process group with 5 BNs - M = (7*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1284+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1392+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1500+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1608+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1176+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1176+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1392+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1500+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1608+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1284+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1176+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1284+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1500+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1608+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1392+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1176+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1284+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1392+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1608+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1500+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1176+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1284+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1392+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1500+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1608+i] = _mm256_sign_epi8(min, sgn); - } -//Process group with 6 BNs -M = (3*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1752+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1788+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1824+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1860+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1896+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1716+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1716+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1788+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1824+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1860+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1896+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1752+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1716+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1752+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1824+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1860+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1896+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1788+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1716+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1752+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1788+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1860+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1896+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1824+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1716+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1752+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1788+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1824+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1896+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1860+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1716+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1752+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1788+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1824+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1860+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1896+i] = _mm256_sign_epi8(min, sgn); - } -//Process group with 8 BNs -M = (2*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1956+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1980+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2004+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2028+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2076+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2100+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1932+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1980+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2004+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2028+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2076+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2100+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1956+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1956+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2004+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2028+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2076+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2100+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1980+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1956+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1980+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2028+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2076+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2100+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2004+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1956+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1980+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2004+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2076+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2100+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2028+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1956+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1980+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2004+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2028+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2076+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2100+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2052+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1956+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1980+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2004+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2028+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2100+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2076+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1956+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1980+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2004+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2028+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2076+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2100+i] = _mm256_sign_epi8(min, sgn); - } -//Process group with 10 BNs -M = (2*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2148+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2196+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2220+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2244+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2268+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2316+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2340+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2124+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2124+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2196+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2220+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2244+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2268+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2316+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2340+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2148+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2124+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2148+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2196+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2220+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2244+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2268+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2316+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2340+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2172+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2124+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2148+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2220+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2244+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2268+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2316+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2340+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2196+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2124+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2148+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2196+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2244+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2268+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2316+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2340+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2220+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2124+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2148+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2196+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2220+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2268+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2316+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2340+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2244+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2124+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2148+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2196+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2220+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2244+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2316+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2340+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2268+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2124+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2148+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2196+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2220+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2244+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2268+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2316+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2340+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2292+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2124+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2148+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2196+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2220+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2244+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2268+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2340+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2316+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2124+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2148+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2196+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2220+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2244+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2268+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2316+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2340+i] = _mm256_sign_epi8(min, sgn); - } -} diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/cnProc/nrLDPC_cnProc_BG2_R15_AVX2.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/cnProc/nrLDPC_cnProc_BG2_R15_AVX2.c deleted file mode 100644 index 046fdb1f22f22e727b28ac4ce40feb30bed36296..0000000000000000000000000000000000000000 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/cnProc/nrLDPC_cnProc_BG2_R15_AVX2.c +++ /dev/null @@ -1,832 +0,0 @@ -#include <stdint.h> -#include <immintrin.h> -static inline void nrLDPC_cnProc_BG2_R15_AVX2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z) { -//Process group with 3 BNs - __m256i ymm0, min, sgn,ones,maxLLR; - ones = _mm256_set1_epi8((char)1); - maxLLR = _mm256_set1_epi8((char)127); - uint32_t M; - int8_t* cnProcBuf = p_procBuf->cnProcBuf; - int8_t* cnProcBufRes = p_procBuf->cnProcBufRes; - M = (6*Z + 31)>>5; - for (int i=0;i<M;i+=2) { - ymm0 = ((__m256i*)cnProcBuf)[72+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[144+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[0+i] = _mm256_sign_epi8(min, sgn); - ymm0 = ((__m256i*)cnProcBuf)[73+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[145+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i+=2) { - ymm0 = ((__m256i*)cnProcBuf)[0+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[144+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[72+i] = _mm256_sign_epi8(min, sgn); - ymm0 = ((__m256i*)cnProcBuf)[1+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[145+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[73+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i+=2) { - ymm0 = ((__m256i*)cnProcBuf)[0+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[72+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[144+i] = _mm256_sign_epi8(min, sgn); - ymm0 = ((__m256i*)cnProcBuf)[1+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[73+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[145+i] = _mm256_sign_epi8(min, sgn); - } -//Process group with 4 BNs - M = (20*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[456+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[936+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[216+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[216+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[936+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[456+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[216+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[936+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[696+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[216+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[936+i] = _mm256_sign_epi8(min, sgn); - } -//Process group with 5 BNs - M = (9*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1284+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1392+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1500+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1608+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1176+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1176+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1392+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1500+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1608+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1284+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1176+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1284+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1500+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1608+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1392+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1176+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1284+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1392+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1608+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1500+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1176+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1284+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1392+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1500+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1608+i] = _mm256_sign_epi8(min, sgn); - } -//Process group with 6 BNs -M = (3*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1752+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1788+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1824+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1860+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1896+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1716+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1716+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1788+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1824+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1860+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1896+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1752+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1716+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1752+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1824+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1860+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1896+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1788+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1716+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1752+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1788+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1860+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1896+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1824+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1716+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1752+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1788+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1824+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1896+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1860+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1716+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1752+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1788+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1824+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1860+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1896+i] = _mm256_sign_epi8(min, sgn); - } -//Process group with 8 BNs -M = (2*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1956+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1980+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2004+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2028+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2076+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2100+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1932+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1980+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2004+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2028+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2076+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2100+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1956+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1956+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2004+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2028+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2076+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2100+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1980+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1956+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1980+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2028+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2076+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2100+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2004+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1956+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1980+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2004+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2076+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2100+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2028+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1956+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1980+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2004+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2028+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2076+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2100+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2052+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1956+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1980+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2004+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2028+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2100+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2076+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1956+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1980+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2004+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2028+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2076+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2100+i] = _mm256_sign_epi8(min, sgn); - } -//Process group with 10 BNs -M = (2*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2148+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2196+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2220+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2244+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2268+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2316+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2340+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2124+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2124+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2196+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2220+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2244+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2268+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2316+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2340+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2148+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2124+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2148+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2196+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2220+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2244+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2268+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2316+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2340+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2172+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2124+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2148+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2220+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2244+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2268+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2316+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2340+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2196+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2124+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2148+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2196+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2244+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2268+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2316+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2340+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2220+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2124+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2148+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2196+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2220+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2268+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2316+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2340+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2244+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2124+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2148+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2196+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2220+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2244+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2316+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2340+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2268+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2124+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2148+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2196+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2220+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2244+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2268+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2316+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2340+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2292+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2124+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2148+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2196+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2220+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2244+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2268+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2340+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2316+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2124+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2148+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2196+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2220+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2244+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2268+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2316+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2340+i] = _mm256_sign_epi8(min, sgn); - } -} diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/cnProc/nrLDPC_cnProc_BG2_R23_AVX2.c b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/cnProc/nrLDPC_cnProc_BG2_R23_AVX2.c deleted file mode 100644 index dc2b67e0853207768762c3824c0d0f1a91d9ca50..0000000000000000000000000000000000000000 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/cnProc/nrLDPC_cnProc_BG2_R23_AVX2.c +++ /dev/null @@ -1,696 +0,0 @@ -#include <stdint.h> -#include <immintrin.h> -static inline void nrLDPC_cnProc_BG2_R23_AVX2(t_nrLDPC_lut* p_lut, t_nrLDPC_procBuf* p_procBuf, uint16_t Z) { -//Process group with 3 BNs - __m256i ymm0, min, sgn,ones,maxLLR; - ones = _mm256_set1_epi8((char)1); - maxLLR = _mm256_set1_epi8((char)127); - uint32_t M; - int8_t* cnProcBuf = p_procBuf->cnProcBuf; - int8_t* cnProcBufRes = p_procBuf->cnProcBufRes; -//Process group with 4 BNs - M = (1*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[456+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[936+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[216+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[216+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[936+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[456+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[216+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[936+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[696+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[216+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[456+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[696+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[936+i] = _mm256_sign_epi8(min, sgn); - } -//Process group with 5 BNs -//Process group with 6 BNs -M = (2*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1752+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1788+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1824+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1860+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1896+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1716+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1716+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1788+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1824+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1860+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1896+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1752+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1716+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1752+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1824+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1860+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1896+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1788+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1716+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1752+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1788+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1860+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1896+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1824+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1716+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1752+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1788+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1824+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1896+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1860+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1716+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1752+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1788+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1824+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1860+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1896+i] = _mm256_sign_epi8(min, sgn); - } -//Process group with 8 BNs -M = (2*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1956+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1980+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2004+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2028+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2076+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2100+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1932+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1980+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2004+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2028+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2076+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2100+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1956+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1956+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2004+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2028+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2076+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2100+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[1980+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1956+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1980+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2028+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2076+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2100+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2004+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1956+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1980+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2004+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2076+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2100+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2028+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1956+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1980+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2004+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2028+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2076+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2100+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2052+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1956+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1980+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2004+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2028+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2100+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2076+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[1932+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1956+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[1980+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2004+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2028+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2052+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2076+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2100+i] = _mm256_sign_epi8(min, sgn); - } -//Process group with 10 BNs -M = (2*Z + 31)>>5; - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2148+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2196+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2220+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2244+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2268+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2316+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2340+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2124+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2124+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2196+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2220+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2244+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2268+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2316+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2340+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2148+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2124+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2148+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2196+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2220+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2244+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2268+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2316+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2340+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2172+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2124+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2148+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2220+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2244+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2268+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2316+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2340+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2196+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2124+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2148+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2196+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2244+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2268+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2316+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2340+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2220+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2124+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2148+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2196+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2220+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2268+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2316+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2340+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2244+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2124+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2148+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2196+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2220+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2244+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2316+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2340+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2268+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2124+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2148+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2196+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2220+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2244+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2268+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2316+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2340+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2292+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2124+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2148+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2196+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2220+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2244+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2268+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2340+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2316+i] = _mm256_sign_epi8(min, sgn); - } - for (int i=0;i<M;i++) { - ymm0 = ((__m256i*)cnProcBuf)[2124+i]; - sgn = _mm256_sign_epi8(ones, ymm0); - min = _mm256_abs_epi8(ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2148+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2172+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2196+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2220+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2244+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2268+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2292+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - ymm0 = ((__m256i*)cnProcBuf)[2316+i]; - min = _mm256_min_epu8(min, _mm256_abs_epi8(ymm0)); - sgn = _mm256_sign_epi8(sgn, ymm0); - min = _mm256_min_epu8(min, maxLLR); - ((__m256i*)cnProcBufRes)[2340+i] = _mm256_sign_epi8(min, sgn); - } -} diff --git a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/llr2CnProcBuf/nrLDPC_llr2CnProcBuf_BG1_AVX2.h b/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/llr2CnProcBuf/nrLDPC_llr2CnProcBuf_BG1_AVX2.h deleted file mode 100644 index af42499dc15cfeb6fca061f466a8fabe09de5668..0000000000000000000000000000000000000000 --- a/openair1/PHY/CODING/nrLDPC_decoder/nrLDPC_tools/ldpc_gen_files/llr2CnProcBuf/nrLDPC_llr2CnProcBuf_BG1_AVX2.h +++ /dev/null @@ -1,1040 +0,0 @@ -#include <omp.h> -#pragma omp declare simd notinbranch -static inline void nrLDPC_llr2CnProcBuf_BG1_AVX2(t_nrLDPC_lut* p_lut, int8_t* llr, int8_t* cnProcBuf, uint16_t Z) { - const uint16_t (*lut_circShift_CNG3) [lut_numCnInCnGroups_BG1_R13[0]] = (uint16_t(*)[lut_numCnInCnGroups_BG1_R13[0]]) p_lut->circShift[0]; - const uint16_t (*lut_circShift_CNG4) [lut_numCnInCnGroups_BG1_R13[1]] = (uint16_t(*)[lut_numCnInCnGroups_BG1_R13[1]]) p_lut->circShift[1]; - const uint16_t (*lut_circShift_CNG5) [lut_numCnInCnGroups_BG1_R13[2]] = (uint16_t(*)[lut_numCnInCnGroups_BG1_R13[2]]) p_lut->circShift[2]; - const uint16_t (*lut_circShift_CNG6) [lut_numCnInCnGroups_BG1_R13[3]] = (uint16_t(*)[lut_numCnInCnGroups_BG1_R13[3]]) p_lut->circShift[3]; - const uint16_t (*lut_circShift_CNG7) [lut_numCnInCnGroups_BG1_R13[4]] = (uint16_t(*)[lut_numCnInCnGroups_BG1_R13[4]]) p_lut->circShift[4]; - const uint16_t (*lut_circShift_CNG8) [lut_numCnInCnGroups_BG1_R13[5]] = (uint16_t(*)[lut_numCnInCnGroups_BG1_R13[5]]) p_lut->circShift[5]; - const uint16_t (*lut_circShift_CNG9) [lut_numCnInCnGroups_BG1_R13[6]] = (uint16_t(*)[lut_numCnInCnGroups_BG1_R13[6]]) p_lut->circShift[6]; - const uint16_t (*lut_circShift_CNG10)[lut_numCnInCnGroups_BG1_R13[7]] = (uint16_t(*)[lut_numCnInCnGroups_BG1_R13[7]]) p_lut->circShift[7]; - const uint16_t (*lut_circShift_CNG19)[lut_numCnInCnGroups_BG1_R13[8]] = (uint16_t(*)[lut_numCnInCnGroups_BG1_R13[8]]) p_lut->circShift[8]; - const uint8_t (*lut_posBnInCnProcBuf_CNG3) [lut_numCnInCnGroups_BG1_R13[0]] = (uint8_t(*)[lut_numCnInCnGroups_BG1_R13[0]]) p_lut->posBnInCnProcBuf[0]; - const uint8_t (*lut_posBnInCnProcBuf_CNG4) [lut_numCnInCnGroups_BG1_R13[1]] = (uint8_t(*)[lut_numCnInCnGroups_BG1_R13[1]]) p_lut->posBnInCnProcBuf[1]; - const uint8_t (*lut_posBnInCnProcBuf_CNG5) [lut_numCnInCnGroups_BG1_R13[2]] = (uint8_t(*)[lut_numCnInCnGroups_BG1_R13[2]]) p_lut->posBnInCnProcBuf[2]; - const uint8_t (*lut_posBnInCnProcBuf_CNG6) [lut_numCnInCnGroups_BG1_R13[3]] = (uint8_t(*)[lut_numCnInCnGroups_BG1_R13[3]]) p_lut->posBnInCnProcBuf[3]; - const uint8_t (*lut_posBnInCnProcBuf_CNG7) [lut_numCnInCnGroups_BG1_R13[4]] = (uint8_t(*)[lut_numCnInCnGroups_BG1_R13[4]]) p_lut->posBnInCnProcBuf[4]; - const uint8_t (*lut_posBnInCnProcBuf_CNG8) [lut_numCnInCnGroups_BG1_R13[5]] = (uint8_t(*)[lut_numCnInCnGroups_BG1_R13[5]]) p_lut->posBnInCnProcBuf[5]; - const uint8_t (*lut_posBnInCnProcBuf_CNG9) [lut_numCnInCnGroups_BG1_R13[6]] = (uint8_t(*)[lut_numCnInCnGroups_BG1_R13[6]]) p_lut->posBnInCnProcBuf[6]; - const uint8_t (*lut_posBnInCnProcBuf_CNG10)[lut_numCnInCnGroups_BG1_R13[7]] = (uint8_t(*)[lut_numCnInCnGroups_BG1_R13[7]]) p_lut->posBnInCnProcBuf[7]; - const uint8_t (*lut_posBnInCnProcBuf_CNG19)[lut_numCnInCnGroups_BG1_R13[8]] = (uint8_t(*)[lut_numCnInCnGroups_BG1_R13[8]]) p_lut->posBnInCnProcBuf[8]; - int8_t* p_cnProcBuf; - uint32_t idxBn = 0; - p_cnProcBuf= &cnProcBuf[0]; - idxBn = lut_posBnInCnProcBuf_CNG3[0][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG3[0][0]); - p_cnProcBuf= &cnProcBuf[384]; - idxBn = lut_posBnInCnProcBuf_CNG3[1][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG3[1][0]); - p_cnProcBuf= &cnProcBuf[768]; - idxBn = lut_posBnInCnProcBuf_CNG3[2][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG3[2][0]); - p_cnProcBuf= &cnProcBuf[1152]; - idxBn = lut_posBnInCnProcBuf_CNG4[0][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[0][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG4[0][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[0][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG4[0][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[0][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG4[0][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[0][3]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG4[0][4]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[0][4]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[3072]; - idxBn = lut_posBnInCnProcBuf_CNG4[1][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[1][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG4[1][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[1][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG4[1][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[1][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG4[1][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[1][3]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG4[1][4]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[1][4]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[4992]; - idxBn = lut_posBnInCnProcBuf_CNG4[2][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[2][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG4[2][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[2][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG4[2][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[2][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG4[2][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[2][3]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG4[2][4]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[2][4]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[6912]; - idxBn = lut_posBnInCnProcBuf_CNG4[3][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[3][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG4[3][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[3][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG4[3][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[3][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG4[3][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[3][3]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG4[3][4]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG4[3][4]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[8832]; - idxBn = lut_posBnInCnProcBuf_CNG5[0][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[0][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[0][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[0][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][3]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[0][4]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][4]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[0][5]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][5]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[0][6]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][6]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[0][7]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][7]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[0][8]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][8]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[0][9]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][9]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[0][10]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][10]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[0][11]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][11]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[0][12]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][12]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[0][13]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][13]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[0][14]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][14]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[0][15]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][15]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[0][16]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][16]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[0][17]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[0][17]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[15744]; - idxBn = lut_posBnInCnProcBuf_CNG5[1][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[1][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[1][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[1][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][3]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[1][4]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][4]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[1][5]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][5]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[1][6]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][6]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[1][7]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][7]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[1][8]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][8]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[1][9]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][9]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[1][10]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][10]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[1][11]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][11]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[1][12]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][12]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[1][13]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][13]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[1][14]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][14]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[1][15]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][15]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[1][16]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][16]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[1][17]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[1][17]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[22656]; - idxBn = lut_posBnInCnProcBuf_CNG5[2][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[2][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[2][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[2][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][3]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[2][4]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][4]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[2][5]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][5]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[2][6]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][6]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[2][7]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][7]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[2][8]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][8]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[2][9]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][9]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[2][10]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][10]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[2][11]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][11]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[2][12]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][12]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[2][13]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][13]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[2][14]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][14]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[2][15]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][15]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[2][16]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][16]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[2][17]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[2][17]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[29568]; - idxBn = lut_posBnInCnProcBuf_CNG5[3][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[3][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[3][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[3][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][3]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[3][4]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][4]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[3][5]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][5]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[3][6]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][6]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[3][7]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][7]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[3][8]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][8]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[3][9]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][9]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[3][10]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][10]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[3][11]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][11]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[3][12]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][12]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[3][13]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][13]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[3][14]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][14]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[3][15]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][15]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[3][16]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][16]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[3][17]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[3][17]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[36480]; - idxBn = lut_posBnInCnProcBuf_CNG5[4][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[4][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[4][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[4][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][3]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[4][4]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][4]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[4][5]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][5]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[4][6]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][6]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[4][7]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][7]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[4][8]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][8]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[4][9]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][9]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[4][10]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][10]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[4][11]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][11]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[4][12]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][12]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[4][13]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][13]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[4][14]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][14]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[4][15]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][15]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[4][16]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][16]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG5[4][17]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG5[4][17]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[43392]; - idxBn = lut_posBnInCnProcBuf_CNG6[0][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[0][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[0][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[0][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[0][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[0][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[0][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[0][3]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[0][4]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[0][4]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[0][5]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[0][5]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[0][6]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[0][6]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[0][7]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[0][7]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[46464]; - idxBn = lut_posBnInCnProcBuf_CNG6[1][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[1][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[1][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[1][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[1][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[1][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[1][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[1][3]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[1][4]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[1][4]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[1][5]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[1][5]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[1][6]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[1][6]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[1][7]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[1][7]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[49536]; - idxBn = lut_posBnInCnProcBuf_CNG6[2][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[2][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[2][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[2][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[2][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[2][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[2][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[2][3]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[2][4]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[2][4]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[2][5]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[2][5]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[2][6]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[2][6]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[2][7]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[2][7]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[52608]; - idxBn = lut_posBnInCnProcBuf_CNG6[3][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[3][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[3][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[3][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[3][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[3][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[3][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[3][3]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[3][4]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[3][4]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[3][5]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[3][5]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[3][6]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[3][6]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[3][7]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[3][7]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[55680]; - idxBn = lut_posBnInCnProcBuf_CNG6[4][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[4][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[4][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[4][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[4][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[4][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[4][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[4][3]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[4][4]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[4][4]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[4][5]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[4][5]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[4][6]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[4][6]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[4][7]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[4][7]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[58752]; - idxBn = lut_posBnInCnProcBuf_CNG6[5][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[5][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[5][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[5][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[5][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[5][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[5][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[5][3]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[5][4]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[5][4]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[5][5]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[5][5]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[5][6]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[5][6]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG6[5][7]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG6[5][7]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[61824]; - idxBn = lut_posBnInCnProcBuf_CNG7[0][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[0][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[0][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[0][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[0][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[0][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[0][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[0][3]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[0][4]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[0][4]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[63744]; - idxBn = lut_posBnInCnProcBuf_CNG7[1][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[1][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[1][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[1][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[1][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[1][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[1][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[1][3]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[1][4]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[1][4]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[65664]; - idxBn = lut_posBnInCnProcBuf_CNG7[2][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[2][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[2][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[2][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[2][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[2][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[2][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[2][3]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[2][4]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[2][4]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[67584]; - idxBn = lut_posBnInCnProcBuf_CNG7[3][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[3][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[3][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[3][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[3][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[3][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[3][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[3][3]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[3][4]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[3][4]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[69504]; - idxBn = lut_posBnInCnProcBuf_CNG7[4][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[4][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[4][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[4][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[4][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[4][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[4][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[4][3]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[4][4]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[4][4]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[71424]; - idxBn = lut_posBnInCnProcBuf_CNG7[5][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[5][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[5][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[5][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[5][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[5][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[5][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[5][3]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[5][4]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[5][4]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[73344]; - idxBn = lut_posBnInCnProcBuf_CNG7[6][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[6][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[6][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[6][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[6][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[6][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[6][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[6][3]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG7[6][4]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG7[6][4]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[75264]; - idxBn = lut_posBnInCnProcBuf_CNG8[0][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[0][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG8[0][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[0][1]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[76032]; - idxBn = lut_posBnInCnProcBuf_CNG8[1][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[1][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG8[1][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[1][1]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[76800]; - idxBn = lut_posBnInCnProcBuf_CNG8[2][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[2][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG8[2][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[2][1]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[77568]; - idxBn = lut_posBnInCnProcBuf_CNG8[3][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[3][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG8[3][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[3][1]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[78336]; - idxBn = lut_posBnInCnProcBuf_CNG8[4][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[4][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG8[4][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[4][1]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[79104]; - idxBn = lut_posBnInCnProcBuf_CNG8[5][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[5][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG8[5][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[5][1]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[79872]; - idxBn = lut_posBnInCnProcBuf_CNG8[6][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[6][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG8[6][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[6][1]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[80640]; - idxBn = lut_posBnInCnProcBuf_CNG8[7][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[7][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG8[7][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG8[7][1]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[81408]; - idxBn = lut_posBnInCnProcBuf_CNG9[0][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[0][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG9[0][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[0][1]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[82176]; - idxBn = lut_posBnInCnProcBuf_CNG9[1][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[1][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG9[1][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[1][1]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[82944]; - idxBn = lut_posBnInCnProcBuf_CNG9[2][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[2][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG9[2][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[2][1]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[83712]; - idxBn = lut_posBnInCnProcBuf_CNG9[3][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[3][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG9[3][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[3][1]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[84480]; - idxBn = lut_posBnInCnProcBuf_CNG9[4][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[4][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG9[4][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[4][1]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[85248]; - idxBn = lut_posBnInCnProcBuf_CNG9[5][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[5][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG9[5][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[5][1]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[86016]; - idxBn = lut_posBnInCnProcBuf_CNG9[6][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[6][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG9[6][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[6][1]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[86784]; - idxBn = lut_posBnInCnProcBuf_CNG9[7][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[7][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG9[7][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[7][1]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[87552]; - idxBn = lut_posBnInCnProcBuf_CNG9[8][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[8][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG9[8][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG9[8][1]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[88320]; - idxBn = lut_posBnInCnProcBuf_CNG10[0][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[0][0]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[88704]; - idxBn = lut_posBnInCnProcBuf_CNG10[1][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[1][0]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[89088]; - idxBn = lut_posBnInCnProcBuf_CNG10[2][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[2][0]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[89472]; - idxBn = lut_posBnInCnProcBuf_CNG10[3][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[3][0]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[89856]; - idxBn = lut_posBnInCnProcBuf_CNG10[4][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[4][0]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[90240]; - idxBn = lut_posBnInCnProcBuf_CNG10[5][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[5][0]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[90624]; - idxBn = lut_posBnInCnProcBuf_CNG10[6][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[6][0]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[91008]; - idxBn = lut_posBnInCnProcBuf_CNG10[7][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[7][0]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[91392]; - idxBn = lut_posBnInCnProcBuf_CNG10[8][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[8][0]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[91776]; - idxBn = lut_posBnInCnProcBuf_CNG10[9][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG10[9][0]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[92160]; - idxBn = lut_posBnInCnProcBuf_CNG19[0][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[0][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[0][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[0][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[0][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[0][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[0][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[0][3]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[93696]; - idxBn = lut_posBnInCnProcBuf_CNG19[1][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[1][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[1][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[1][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[1][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[1][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[1][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[1][3]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[95232]; - idxBn = lut_posBnInCnProcBuf_CNG19[2][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[2][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[2][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[2][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[2][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[2][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[2][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[2][3]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[96768]; - idxBn = lut_posBnInCnProcBuf_CNG19[3][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[3][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[3][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[3][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[3][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[3][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[3][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[3][3]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[98304]; - idxBn = lut_posBnInCnProcBuf_CNG19[4][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[4][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[4][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[4][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[4][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[4][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[4][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[4][3]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[99840]; - idxBn = lut_posBnInCnProcBuf_CNG19[5][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[5][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[5][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[5][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[5][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[5][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[5][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[5][3]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[101376]; - idxBn = lut_posBnInCnProcBuf_CNG19[6][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[6][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[6][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[6][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[6][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[6][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[6][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[6][3]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[102912]; - idxBn = lut_posBnInCnProcBuf_CNG19[7][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[7][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[7][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[7][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[7][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[7][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[7][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[7][3]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[104448]; - idxBn = lut_posBnInCnProcBuf_CNG19[8][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[8][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[8][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[8][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[8][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[8][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[8][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[8][3]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[105984]; - idxBn = lut_posBnInCnProcBuf_CNG19[9][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[9][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[9][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[9][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[9][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[9][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[9][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[9][3]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[107520]; - idxBn = lut_posBnInCnProcBuf_CNG19[10][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[10][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[10][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[10][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[10][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[10][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[10][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[10][3]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[109056]; - idxBn = lut_posBnInCnProcBuf_CNG19[11][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[11][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[11][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[11][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[11][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[11][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[11][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[11][3]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[110592]; - idxBn = lut_posBnInCnProcBuf_CNG19[12][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[12][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[12][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[12][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[12][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[12][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[12][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[12][3]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[112128]; - idxBn = lut_posBnInCnProcBuf_CNG19[13][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[13][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[13][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[13][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[13][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[13][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[13][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[13][3]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[113664]; - idxBn = lut_posBnInCnProcBuf_CNG19[14][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[14][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[14][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[14][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[14][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[14][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[14][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[14][3]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[115200]; - idxBn = lut_posBnInCnProcBuf_CNG19[15][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[15][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[15][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[15][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[15][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[15][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[15][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[15][3]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[116736]; - idxBn = lut_posBnInCnProcBuf_CNG19[16][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[16][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[16][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[16][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[16][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[16][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[16][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[16][3]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[118272]; - idxBn = lut_posBnInCnProcBuf_CNG19[17][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[17][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[17][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[17][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[17][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[17][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[17][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[17][3]); - p_cnProcBuf += Z; - p_cnProcBuf= &cnProcBuf[119808]; - idxBn = lut_posBnInCnProcBuf_CNG19[18][0]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[18][0]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[18][1]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[18][1]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[18][2]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[18][2]); - p_cnProcBuf += Z; - idxBn = lut_posBnInCnProcBuf_CNG19[18][3]*Z; - nrLDPC_circ_memcpy1(p_cnProcBuf, &llr[idxBn], Z, lut_circShift_CNG19[18][3]); - p_cnProcBuf += Z; -}