Commit 8d4405bd authored by knopp's avatar knopp

added avx2 optimized turbo decoder for 16-bit LLR. This decoder parallelizes...

added avx2 optimized turbo decoder for 16-bit LLR.  This decoder parallelizes by decoding 2 code segments concurrently. requires updates dlsch_decoding.c to identify when new parallel version can be used. other minor changes related to memory allocations for future avx2 optimizations (32-byte alignment).
parent 27b1707e
...@@ -134,7 +134,7 @@ else (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l") ...@@ -134,7 +134,7 @@ else (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l")
set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -mavx2") set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -mavx2")
endif() endif()
if (CPUINFO MATCHES "sse4_2") if (CPUINFO MATCHES "sse4_2")
set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -msse4.2") set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -mavx2 -msse4.2")
endif() endif()
if (CPUINFO MATCHES "sse4_1") if (CPUINFO MATCHES "sse4_1")
set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -msse4.1") set(C_FLAGS_PROCESSOR "${C_FLAGS_PROCESSOR} -msse4.1")
...@@ -168,7 +168,7 @@ set(CMAKE_CXX_FLAGS ...@@ -168,7 +168,7 @@ set(CMAKE_CXX_FLAGS
# these changes are related to hardcoded path to include .h files # these changes are related to hardcoded path to include .h files
add_definitions(-DCMAKER) add_definitions(-DCMAKER)
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS} -g -DMALLOC_CHECK_=3") set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS} -g -DMALLOC_CHECK_=3")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS} -g -DMALLOC_CHECK_=3 -O2") set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS} -g -DMALLOC_CHECK_=3 -O3")
# Below has been put in comment because does not work with # Below has been put in comment because does not work with
# SVN authentication. # SVN authentication.
...@@ -840,6 +840,7 @@ set(PHY_SRC ...@@ -840,6 +840,7 @@ set(PHY_SRC
${OPENAIR1_DIR}/PHY/CODING/crc_byte.c ${OPENAIR1_DIR}/PHY/CODING/crc_byte.c
${OPENAIR1_DIR}/PHY/CODING/3gpplte_turbo_decoder_sse_8bit.c ${OPENAIR1_DIR}/PHY/CODING/3gpplte_turbo_decoder_sse_8bit.c
${OPENAIR1_DIR}/PHY/CODING/3gpplte_turbo_decoder_sse_16bit.c ${OPENAIR1_DIR}/PHY/CODING/3gpplte_turbo_decoder_sse_16bit.c
${OPENAIR1_DIR}/PHY/CODING/3gpplte_turbo_decoder_avx2_16bit.c
${OPENAIR1_DIR}/PHY/CODING/lte_rate_matching.c ${OPENAIR1_DIR}/PHY/CODING/lte_rate_matching.c
${OPENAIR1_DIR}/PHY/CODING/rate_matching.c ${OPENAIR1_DIR}/PHY/CODING/rate_matching.c
${OPENAIR1_DIR}/PHY/CODING/viterbi.c ${OPENAIR1_DIR}/PHY/CODING/viterbi.c
......
This diff is collapsed.
...@@ -483,6 +483,24 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, ...@@ -483,6 +483,24 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
time_stats_t *intl1_stats, time_stats_t *intl1_stats,
time_stats_t *intl2_stats); time_stats_t *intl2_stats);
uint8_t phy_threegpplte_turbo_decoder16avx2(int16_t *y,
int16_t *y2,
uint8_t *decoded_bytes,
uint8_t *decoded_bytes2,
uint16_t n,
uint16_t interleaver_f1,
uint16_t interleaver_f2,
uint8_t max_iterations,
uint8_t crc_type,
uint8_t F,
time_stats_t *init_stats,
time_stats_t *alpha_stats,
time_stats_t *beta_stats,
time_stats_t *gamma_stats,
time_stats_t *ext_stats,
time_stats_t *intl1_stats,
time_stats_t *intl2_stats);
/*! /*!
\brief This routine performs max-logmap detection for the 3GPP turbo code (with termination). It is optimized for SIMD processing and 8-bit \brief This routine performs max-logmap detection for the 3GPP turbo code (with termination). It is optimized for SIMD processing and 8-bit
LLR arithmetic, and requires SSE2,SSSE3 and SSE4.1 (gcc >=4.3 and appropriate CPU) LLR arithmetic, and requires SSE2,SSSE3 and SSE4.1 (gcc >=4.3 and appropriate CPU)
......
...@@ -895,7 +895,9 @@ void phy_init_lte_top(LTE_DL_FRAME_PARMS *lte_frame_parms) ...@@ -895,7 +895,9 @@ void phy_init_lte_top(LTE_DL_FRAME_PARMS *lte_frame_parms)
init_td8(); init_td8();
init_td16(); init_td16();
#ifdef __AVX2__
init_td16avx2();
#endif
lte_sync_time_init(lte_frame_parms); lte_sync_time_init(lte_frame_parms);
......
...@@ -26,187 +26,187 @@ ...@@ -26,187 +26,187 @@
Address : Eurecom, Campus SophiaTech, 450 Route des Chappes, CS 50193 - 06904 Biot Sophia Antipolis cedex, FRANCE Address : Eurecom, Campus SophiaTech, 450 Route des Chappes, CS 50193 - 06904 Biot Sophia Antipolis cedex, FRANCE
*******************************************************************************/ *******************************************************************************/
short filt24_0[24] __attribute__((aligned(16))) ={ short filt24_0[24] __attribute__((aligned(32))) ={
2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0,0,0,0 2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0,0,0,0
}; };
short filt24_0_dcl[24] __attribute__((aligned(16))) ={ short filt24_0_dcl[24] __attribute__((aligned(32))) ={
2341,4681,7022,9362,11703,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0,0,0,0 2341,4681,7022,9362,11703,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0,0,0,0
}; };
short filt24_0_dcr[24] __attribute__((aligned(16))) ={ short filt24_0_dcr[24] __attribute__((aligned(32))) ={
2730,5461,8192,10922,13653,16384,14043,11703,9362,7022,4681,0,0,0,0,0,0,0,0,0,0,0,0 2730,5461,8192,10922,13653,16384,14043,11703,9362,7022,4681,0,0,0,0,0,0,0,0,0,0,0,0
}; };
short filt24_1[24] __attribute__((aligned(16))) ={ short filt24_1[24] __attribute__((aligned(32))) ={
0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0,0,0 0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0,0,0
}; };
short filt24_1_dcl[24] __attribute__((aligned(16))) ={ short filt24_1_dcl[24] __attribute__((aligned(32))) ={
0,4681,7022,9362,11703,14043,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0,0,0 0,4681,7022,9362,11703,14043,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0,0,0
}; };
short filt24_1_dcr[24] __attribute__((aligned(16))) ={ short filt24_1_dcr[24] __attribute__((aligned(32))) ={
0,2730,5461,8192,10922,13653,16384,14043,11703,9362,7022,4681,0,0,0,0,0,0,0,0,0,0,0,0 0,2730,5461,8192,10922,13653,16384,14043,11703,9362,7022,4681,0,0,0,0,0,0,0,0,0,0,0,0
}; };
short filt24_2[24] __attribute__((aligned(16))) ={ short filt24_2[24] __attribute__((aligned(32))) ={
0,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0,0 0,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0,0
}; };
short filt24_2_dcl[24] __attribute__((aligned(16))) ={ short filt24_2_dcl[24] __attribute__((aligned(32))) ={
0,0,2341,4681,7022,9362, 11703,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0,0 0,0,2341,4681,7022,9362, 11703,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0,0
}; };
short filt24_2_dcr[24] __attribute__((aligned(16))) ={ short filt24_2_dcr[24] __attribute__((aligned(32))) ={
0,0,2730,5461,8192,10922,13653,16384,14043,11703,9362,4681,2341,0,0,0,0,0,0,0,0,0,0,0 0,0,2730,5461,8192,10922,13653,16384,14043,11703,9362,4681,2341,0,0,0,0,0,0,0,0,0,0,0
}; };
// X X X Y | X X X X | X Y X X // X X X Y | X X X X | X Y X X
short filt24_3[24] __attribute__((aligned(16))) ={ short filt24_3[24] __attribute__((aligned(32))) ={
0,0,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0 0,0,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0
}; };
short filt24_3_dcl[24] __attribute__((aligned(16))) ={ short filt24_3_dcl[24] __attribute__((aligned(32))) ={
0,0,0,2341,4681,7022,9362,14043,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0 0,0,0,2341,4681,7022,9362,14043,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0
}; };
// X X X Y | X X DC X X | X Y X X // X X X Y | X X DC X X | X Y X X
short filt24_3_dcr[24] __attribute__((aligned(16))) ={ short filt24_3_dcr[24] __attribute__((aligned(32))) ={
0,0,0,2730,5461,8192,10922,13653,16384,14043,11703,7022,4681,2341,0,0,0,0,0,0,0,0,0,0 0,0,0,2730,5461,8192,10922,13653,16384,14043,11703,7022,4681,2341,0,0,0,0,0,0,0,0,0,0
}; };
short filt24_4[24] __attribute__((aligned(16))) ={ short filt24_4[24] __attribute__((aligned(32))) ={
0,0,0,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0 0,0,0,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0
}; };
short filt24_4_dcl[24] __attribute__((aligned(16))) ={ short filt24_4_dcl[24] __attribute__((aligned(32))) ={
0,0,0,0,2341,7022,9362,11703,14043,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0 0,0,0,0,2341,7022,9362,11703,14043,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0
}; };
short filt24_4_dcr[24] __attribute__((aligned(16))) ={ short filt24_4_dcr[24] __attribute__((aligned(32))) ={
0,0,0,0,2730,5461,8192,10922,13653,16384,14043,11703,7022,4681,2341,0,0,0,0,0,0,0,0,0 0,0,0,0,2730,5461,8192,10922,13653,16384,14043,11703,7022,4681,2341,0,0,0,0,0,0,0,0,0
}; };
short filt24_5[24] __attribute__((aligned(16))) ={ short filt24_5[24] __attribute__((aligned(32))) ={
0,0,0,0,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0 0,0,0,0,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0
}; };
// X X X Y | X X DC X X | X Y X X // X X X Y | X X DC X X | X Y X X
short filt24_5_dcl[24] __attribute__((aligned(16))) ={ short filt24_5_dcl[24] __attribute__((aligned(32))) ={
0,0,0,0,0,2341,4681,9362,11703,14043,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0 0,0,0,0,0,2341,4681,9362,11703,14043,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0
}; };
short filt24_5_dcr[24] __attribute__((aligned(16))) ={ short filt24_5_dcr[24] __attribute__((aligned(32))) ={
0,0,0,0,0,2730,5461,8192,10922,13653,16384,11703,9362,7022,4681,2730,0,0,0,0,0,0,0,0 0,0,0,0,0,2730,5461,8192,10922,13653,16384,11703,9362,7022,4681,2730,0,0,0,0,0,0,0,0
}; };
short filt24_6[24] __attribute__((aligned(16))) ={ short filt24_6[24] __attribute__((aligned(32))) ={
0,0,0,0,0,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0 0,0,0,0,0,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0
}; };
short filt24_6_dcl[24] __attribute__((aligned(16))) ={ short filt24_6_dcl[24] __attribute__((aligned(32))) ={
0,0,0,0,0,0,4681,7022,9362,11703,14043,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0 0,0,0,0,0,0,4681,7022,9362,11703,14043,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0
}; };
short filt24_6_dcr[24] __attribute__((aligned(16))) ={ short filt24_6_dcr[24] __attribute__((aligned(32))) ={
0,0,0,0,0,0,2730,5461,8192,10922,13653,16384,14043,11703,9362,7022,4681,0,0,0,0,0,0,0 0,0,0,0,0,0,2730,5461,8192,10922,13653,16384,14043,11703,9362,7022,4681,0,0,0,0,0,0,0
}; };
short filt24_7[24] __attribute__((aligned(16))) ={ short filt24_7[24] __attribute__((aligned(32))) ={
0,0,0,0,0,0,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0 0,0,0,0,0,0,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0
}; };
short filt24_7_dcl[24] __attribute__((aligned(16))) ={ short filt24_7_dcl[24] __attribute__((aligned(32))) ={
0,0,0,0,0,0,0,4681,7022,9362,11703,14043,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0 0,0,0,0,0,0,0,4681,7022,9362,11703,14043,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0
}; };
short filt24_7_dcr[24] __attribute__((aligned(16))) ={ short filt24_7_dcr[24] __attribute__((aligned(32))) ={
0,0,0,0,0,0,0,2730,5461,8192,10922,13653,16384,14043,11703,9362,7022,4681,0,0,0,0,0,0 0,0,0,0,0,0,0,2730,5461,8192,10922,13653,16384,14043,11703,9362,7022,4681,0,0,0,0,0,0
}; };
short filt24_0l[24] __attribute__((aligned(16))) ={ short filt24_0l[24] __attribute__((aligned(32))) ={
30037,27306,24576,21845,19114,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0,0,0,0 30037,27306,24576,21845,19114,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0,0,0,0
}; };
short filt24_1l[24] __attribute__((aligned(16))) ={ short filt24_1l[24] __attribute__((aligned(32))) ={
0,30037,27306,24576,21845,19114,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0,0,0 0,30037,27306,24576,21845,19114,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0,0,0
}; };
short filt24_2l[24] __attribute__((aligned(16))) ={ short filt24_2l[24] __attribute__((aligned(32))) ={
0,0,30037,27306,24576,21845,19114,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0,0 0,0,30037,27306,24576,21845,19114,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0,0
}; };
short filt24_3l[24] __attribute__((aligned(16))) ={ short filt24_3l[24] __attribute__((aligned(32))) ={
//0,0,0,30037,27306,24576,21845,19114,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0}; //0,0,0,30037,27306,24576,21845,19114,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0};
0,0,0,0,0,24576,21845,19114,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0 0,0,0,0,0,24576,21845,19114,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0
}; };
short filt24_4l[24] __attribute__((aligned(16))) ={ short filt24_4l[24] __attribute__((aligned(32))) ={
0,0,0,0,30037,27306,24576,21845,19114,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0 0,0,0,0,30037,27306,24576,21845,19114,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0
}; };
short filt24_5l[24] __attribute__((aligned(16))) ={ short filt24_5l[24] __attribute__((aligned(32))) ={
0,0,0,0,0,30037,27306,24576,21845,19114,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0 0,0,0,0,0,30037,27306,24576,21845,19114,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0
}; };
short filt24_6l[24] __attribute__((aligned(16))) ={ short filt24_6l[24] __attribute__((aligned(32))) ={
0,0,0,0,0,0,30037,27306,24576,21845,19114,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0 0,0,0,0,0,0,30037,27306,24576,21845,19114,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0
}; };
short filt24_7l[24] __attribute__((aligned(16))) ={ short filt24_7l[24] __attribute__((aligned(32))) ={
0,0,0,0,0,0,0,30037,27306,24576,21845,19114,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0 0,0,0,0,0,0,0,30037,27306,24576,21845,19114,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0
}; };
short filt24_0l2[24] __attribute__((aligned(16))) ={ short filt24_0l2[24] __attribute__((aligned(32))) ={
2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0,0,0,0 2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0,0,0,0
}; };
short filt24_1l2[24] __attribute__((aligned(16))) ={ short filt24_1l2[24] __attribute__((aligned(32))) ={
0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0,0,0 0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0,0,0
}; };
short filt24_2l2[24] __attribute__((aligned(16))) ={ short filt24_2l2[24] __attribute__((aligned(32))) ={
-2730,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0,0 -2730,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0,0
}; };
short filt24_3l2[24] __attribute__((aligned(16))) ={ short filt24_3l2[24] __attribute__((aligned(32))) ={
-5461,-2730,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0 -5461,-2730,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0,0
}; };
short filt24_4l2[24] __attribute__((aligned(16))) ={ short filt24_4l2[24] __attribute__((aligned(32))) ={
-8192,-5461,-2730,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0 -8192,-5461,-2730,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0,0
}; };
short filt24_5l2[24] __attribute__((aligned(16))) ={ short filt24_5l2[24] __attribute__((aligned(32))) ={
0,-8192,-5461,-2730,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0 0,-8192,-5461,-2730,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0,0
}; };
short filt24_6l2[24] __attribute__((aligned(16))) ={ short filt24_6l2[24] __attribute__((aligned(32))) ={
-13653,-10922,-8192,-5461,-2730,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0 -13653,-10922,-8192,-5461,-2730,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0,0
}; };
short filt24_7l2[24] __attribute__((aligned(16))) ={ short filt24_7l2[24] __attribute__((aligned(32))) ={
0,-13653,-10922,-8192,-5461,-2730,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0 0,-13653,-10922,-8192,-5461,-2730,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,0,0,0,0,0
}; };
short filt24_0r[24] __attribute__((aligned(16))) ={ short filt24_0r[24] __attribute__((aligned(32))) ={
2730,5461,8192,10922,13653,16384,19114,21845,24576,27306,30037,0,0,0,0,0,0,0,0,0,0,0,0,0 2730,5461,8192,10922,13653,16384,19114,21845,24576,27306,30037,0,0,0,0,0,0,0,0,0,0,0,0,0
}; };
short filt24_1r[24] __attribute__((aligned(16))) ={ short filt24_1r[24] __attribute__((aligned(32))) ={
0,2730,5461,8192,10922,13653,16384,19114,21845,24576,27306,30037,0,0,0,0,0,0,0,0,0,0,0,0 0,2730,5461,8192,10922,13653,16384,19114,21845,24576,27306,30037,0,0,0,0,0,0,0,0,0,0,0,0
}; };
short filt24_2r[24] __attribute__((aligned(16))) ={ short filt24_2r[24] __attribute__((aligned(32))) ={
0,0,2730,5461,8192,10922,13653,16384,19114,21845,24576,27306,30037,0,0,0,0,0,0,0,0,0,0,0 0,0,2730,5461,8192,10922,13653,16384,19114,21845,24576,27306,30037,0,0,0,0,0,0,0,0,0,0,0
}; };
short filt24_3r[24] __attribute__((aligned(16))) ={ short filt24_3r[24] __attribute__((aligned(32))) ={
0,0,0,2730,5461,8192,10922,13653,16384,19114,21845,24576,27306,30037,0,0,0,0,0,0,0,0,0,0 0,0,0,2730,5461,8192,10922,13653,16384,19114,21845,24576,27306,30037,0,0,0,0,0,0,0,0,0,0
}; };
short filt24_4r[24] __attribute__((aligned(16))) ={ short filt24_4r[24] __attribute__((aligned(32))) ={
0,0,0,0,2730,5461,8192,10922,13653,16384,19114,21845,24576,27306,30037,0,0,0,0,0,0,0,0,0 0,0,0,0,2730,5461,8192,10922,13653,16384,19114,21845,24576,27306,30037,0,0,0,0,0,0,0,0,0
}; };
short filt24_5r[24] __attribute__((aligned(16))) ={ short filt24_5r[24] __attribute__((aligned(32))) ={
0,0,0,0,0,2730,5461,8192,10922,13653,16384,19114,21845,24576,27306,30037,0,0,0,0,0,0,0,0 0,0,0,0,0,2730,5461,8192,10922,13653,16384,19114,21845,24576,27306,30037,0,0,0,0,0,0,0,0
}; };
short filt24_6r[24] __attribute__((aligned(16))) ={ short filt24_6r[24] __attribute__((aligned(32))) ={
0,0,0,0,0,0,2730,5461,8192,10922,13653,16384,19114,21845,24576,27306,30037,0,0,0,0,0,0,0 0,0,0,0,0,0,2730,5461,8192,10922,13653,16384,19114,21845,24576,27306,30037,0,0,0,0,0,0,0
}; };
short filt24_7r[24] __attribute__((aligned(16))) ={ short filt24_7r[24] __attribute__((aligned(32))) ={
0,0,0,0,0,0,0,2730,5461,8192,10922,13653,16384,19114,21845,24576,27306,30037,0,0,0,0,0,0 0,0,0,0,0,0,0,2730,5461,8192,10922,13653,16384,19114,21845,24576,27306,30037,0,0,0,0,0,0
}; };
short filt24_0r2[24] __attribute__((aligned(16))) ={ /****/ short filt24_0r2[24] __attribute__((aligned(32))) ={ /****/
2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,-2730,-5461,-8192,-10922,-13653,0,0,0,0,0,0,0 2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,-2730,-5461,-8192,-10922,-13653,0,0,0,0,0,0,0
}; };
short filt24_1r2[24] __attribute__((aligned(16))) ={ short filt24_1r2[24] __attribute__((aligned(32))) ={
0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,-2730,-5461,-8192,-10922,-13653,0,0,0,0,0,0 0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,-2730,-5461,-8192,-10922,-13653,0,0,0,0,0,0
}; };
short filt24_2r2[24] __attribute__((aligned(16))) ={ short filt24_2r2[24] __attribute__((aligned(32))) ={
0,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,-2730,-5461,-8192,-10922,-13653,0,0,0,0,0 0,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,-2730,-5461,-8192,-10922,-13653,0,0,0,0,0
}; };
short filt24_3r2[24] __attribute__((aligned(16))) ={ short filt24_3r2[24] __attribute__((aligned(32))) ={
0,0,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,-2730,-5461,-8192,-10922,-13653,0,0,0,0 0,0,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,-2730,-5461,-8192,-10922,-13653,0,0,0,0
}; };
short filt24_4r2[24] __attribute__((aligned(16))) ={ short filt24_4r2[24] __attribute__((aligned(32))) ={
0,0,0,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,-2730,-5461,-8192,-10922,-13653,0,0,0 0,0,0,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,-2730,-5461,-8192,-10922,-13653,0,0,0
}; };
short filt24_5r2[24] __attribute__((aligned(16))) ={ short filt24_5r2[24] __attribute__((aligned(32))) ={
0,0,0,0,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,-2730,-5461,-8192,-10922,-13653,0,0 0,0,0,0,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,-2730,-5461,-8192,-10922,-13653,0,0
}; };
short filt24_6r2[24] __attribute__((aligned(16))) ={ short filt24_6r2[24] __attribute__((aligned(32))) ={
0,0,0,0,0,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,-2730,-5461,-8192,-10922,-13653,0 0,0,0,0,0,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,-2730,-5461,-8192,-10922,-13653,0
}; };
short filt24_7r2[24] __attribute__((aligned(16))) ={ short filt24_7r2[24] __attribute__((aligned(32))) ={
0,0,0,0,0,0,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,-2730,-5461,-8192,-10922,-13653 0,0,0,0,0,0,0,2730,5461,8192,10922,13653,16384,13653,10922,8192,5461,2730,0,-2730,-5461,-8192,-10922,-13653
}; };
...@@ -52,8 +52,8 @@ ...@@ -52,8 +52,8 @@
int* sync_corr_ue0 = NULL; int* sync_corr_ue0 = NULL;
int* sync_corr_ue1 = NULL; int* sync_corr_ue1 = NULL;
int* sync_corr_ue2 = NULL; int* sync_corr_ue2 = NULL;
int sync_tmp[2048*4] __attribute__((aligned(16))); int sync_tmp[2048*4] __attribute__((aligned(32)));
short syncF_tmp[2048*2] __attribute__((aligned(16))); short syncF_tmp[2048*2] __attribute__((aligned(32)));
......
...@@ -56,8 +56,8 @@ void lte_sync_timefreq(PHY_VARS_UE *ue,int band,unsigned int DL_freq) ...@@ -56,8 +56,8 @@ void lte_sync_timefreq(PHY_VARS_UE *ue,int band,unsigned int DL_freq)
{ {
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
UE_SCAN_INFO_t *scan_info = &ue->scan_info[band]; UE_SCAN_INFO_t *scan_info = &ue->scan_info[band];
int16_t spectrum[12288] __attribute__((aligned(16))); int16_t spectrum[12288] __attribute__((aligned(32)));
int16_t spectrum_p5ms[12288] __attribute__((aligned(16))); int16_t spectrum_p5ms[12288] __attribute__((aligned(32)));
int i,f,band_idx; int i,f,band_idx;
__m128i autocorr0[256/4],autocorr1[256/4],autocorr2[256/4]; __m128i autocorr0[256/4],autocorr1[256/4],autocorr2[256/4];
__m128i autocorr0_t[256/4],autocorr1_t[256/4],autocorr2_t[256/4]; __m128i autocorr0_t[256/4],autocorr1_t[256/4],autocorr2_t[256/4];
......
...@@ -186,6 +186,27 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue, ...@@ -186,6 +186,27 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue,
uint8_t crc_type; uint8_t crc_type;
#ifdef DEBUG_DLSCH_DECODING #ifdef DEBUG_DLSCH_DECODING
uint16_t i; uint16_t i;
#endif
#ifdef __AVX2__
int Kr_last,skipped_last=0;
uint8_t (*tc_2cw)(int16_t *y,
int16_t *y2,
uint8_t *,
uint8_t *,
uint16_t,
uint16_t,
uint16_t,
uint8_t,
uint8_t,
uint8_t,
time_stats_t *,
time_stats_t *,
time_stats_t *,
time_stats_t *,
time_stats_t *,
time_stats_t *,
time_stats_t *);
#endif #endif
uint8_t (*tc)(int16_t *y, uint8_t (*tc)(int16_t *y,
uint8_t *, uint8_t *,
...@@ -203,6 +224,9 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue, ...@@ -203,6 +224,9 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue,
time_stats_t *, time_stats_t *,
time_stats_t *); time_stats_t *);
if (!dlsch_llr) { if (!dlsch_llr) {
printf("dlsch_decoding.c: NULL dlsch_llr pointer\n"); printf("dlsch_decoding.c: NULL dlsch_llr pointer\n");
return(dlsch->max_turbo_iterations); return(dlsch->max_turbo_iterations);
...@@ -223,8 +247,12 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue, ...@@ -223,8 +247,12 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue,
return(dlsch->max_turbo_iterations); return(dlsch->max_turbo_iterations);
} }
if (llr8_flag == 0) if (llr8_flag == 0) {
#ifdef __AVX2__
tc_2cw = phy_threegpplte_turbo_decoder16avx2;
#endif
tc = phy_threegpplte_turbo_decoder16;