Commit b1e28edd authored by Florian Kaltenberger's avatar Florian Kaltenberger

Merge branch 'enhancement-43-AVX2' into bugfix-48-L1L2signaling

corrected some additional FFT related issues from AVX2 merge when running with real-time MODEM.

Conflicts:
	openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c
parents 801e343c c0f6881c
...@@ -174,7 +174,7 @@ set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-rpath -Wl,${OPENAIR_ ...@@ -174,7 +174,7 @@ set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-rpath -Wl,${OPENAIR_
# these changes are related to hardcoded path to include .h files # these changes are related to hardcoded path to include .h files
add_definitions(-DCMAKER) add_definitions(-DCMAKER)
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS} -g -DMALLOC_CHECK_=3") set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS} -g -DMALLOC_CHECK_=3")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS} -g -DMALLOC_CHECK_=3 -O2") set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS} -g -DMALLOC_CHECK_=3 -O3")
set(GIT_BRANCH "UNKNOWN") set(GIT_BRANCH "UNKNOWN")
...@@ -949,6 +949,7 @@ set(PHY_SRC ...@@ -949,6 +949,7 @@ set(PHY_SRC
${OPENAIR1_DIR}/PHY/CODING/crc_byte.c ${OPENAIR1_DIR}/PHY/CODING/crc_byte.c
${OPENAIR1_DIR}/PHY/CODING/3gpplte_turbo_decoder_sse_8bit.c ${OPENAIR1_DIR}/PHY/CODING/3gpplte_turbo_decoder_sse_8bit.c
${OPENAIR1_DIR}/PHY/CODING/3gpplte_turbo_decoder_sse_16bit.c ${OPENAIR1_DIR}/PHY/CODING/3gpplte_turbo_decoder_sse_16bit.c
${OPENAIR1_DIR}/PHY/CODING/3gpplte_turbo_decoder_avx2_16bit.c
${OPENAIR1_DIR}/PHY/CODING/lte_rate_matching.c ${OPENAIR1_DIR}/PHY/CODING/lte_rate_matching.c
${OPENAIR1_DIR}/PHY/CODING/rate_matching.c ${OPENAIR1_DIR}/PHY/CODING/rate_matching.c
${OPENAIR1_DIR}/PHY/CODING/viterbi.c ${OPENAIR1_DIR}/PHY/CODING/viterbi.c
......
This diff is collapsed.
This diff is collapsed.
...@@ -5,7 +5,7 @@ RATE12CC_SRC = ccoding_byte.c viterbi.c crc_byte.c ...@@ -5,7 +5,7 @@ RATE12CC_SRC = ccoding_byte.c viterbi.c crc_byte.c
all: 3gpplte_sse all: 3gpplte_sse
3gpplte_sse: $(TURBO_SRC) 3gpplte_sse: $(TURBO_SRC)
gcc -o 3gpplte_sse 3gpplte_sse.c -msse4 -Wall -g -ggdb -DMAIN gcc -o 3gpplte_sse 3gpplte_sse.c -msse4 -Wall -g -ggdb -DTC_MAIN -I../..
......
...@@ -483,6 +483,24 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, ...@@ -483,6 +483,24 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y,
time_stats_t *intl1_stats, time_stats_t *intl1_stats,
time_stats_t *intl2_stats); time_stats_t *intl2_stats);
uint8_t phy_threegpplte_turbo_decoder16avx2(int16_t *y,
int16_t *y2,
uint8_t *decoded_bytes,
uint8_t *decoded_bytes2,
uint16_t n,
uint16_t interleaver_f1,
uint16_t interleaver_f2,
uint8_t max_iterations,
uint8_t crc_type,
uint8_t F,
time_stats_t *init_stats,
time_stats_t *alpha_stats,
time_stats_t *beta_stats,
time_stats_t *gamma_stats,
time_stats_t *ext_stats,
time_stats_t *intl1_stats,
time_stats_t *intl2_stats);
/*! /*!
\brief This routine performs max-logmap detection for the 3GPP turbo code (with termination). It is optimized for SIMD processing and 8-bit \brief This routine performs max-logmap detection for the 3GPP turbo code (with termination). It is optimized for SIMD processing and 8-bit
LLR arithmetic, and requires SSE2,SSSE3 and SSE4.1 (gcc >=4.3 and appropriate CPU) LLR arithmetic, and requires SSE2,SSSE3 and SSE4.1 (gcc >=4.3 and appropriate CPU)
......
...@@ -888,12 +888,16 @@ void phy_init_lte_top(LTE_DL_FRAME_PARMS *lte_frame_parms) ...@@ -888,12 +888,16 @@ void phy_init_lte_top(LTE_DL_FRAME_PARMS *lte_frame_parms)
ccodelte_init(); ccodelte_init();
ccodelte_init_inv(); ccodelte_init_inv();
treillis_table_init();
phy_generate_viterbi_tables(); phy_generate_viterbi_tables();
phy_generate_viterbi_tables_lte(); phy_generate_viterbi_tables_lte();
init_td8(); init_td8();
init_td16(); init_td16();
#ifdef __AVX2__
init_td16avx2();
#endif
lte_sync_time_init(lte_frame_parms); lte_sync_time_init(lte_frame_parms);
......
This diff is collapsed.
...@@ -203,8 +203,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue, ...@@ -203,8 +203,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue,
multadd_complex_vector_real_scalar(dl_ch-(phy_vars_ue->lte_frame_parms.ofdm_symbol_size<<1), multadd_complex_vector_real_scalar(dl_ch-(phy_vars_ue->lte_frame_parms.ofdm_symbol_size<<1),
phy_vars_ue->ch_est_alpha,dl_ch-(phy_vars_ue->lte_frame_parms.ofdm_symbol_size<<1), phy_vars_ue->ch_est_alpha,dl_ch-(phy_vars_ue->lte_frame_parms.ofdm_symbol_size<<1),
1,phy_vars_ue->lte_frame_parms.ofdm_symbol_size); 1,phy_vars_ue->lte_frame_parms.ofdm_symbol_size);
#ifdef DEBUG_CH
printf("k %d, first_carrier %d\n",k,phy_vars_ue->lte_frame_parms.first_carrier_offset);
#endif
if ((phy_vars_ue->lte_frame_parms.N_RB_DL==6) || if ((phy_vars_ue->lte_frame_parms.N_RB_DL==6) ||
(phy_vars_ue->lte_frame_parms.N_RB_DL==50) || (phy_vars_ue->lte_frame_parms.N_RB_DL==50) ||
(phy_vars_ue->lte_frame_parms.N_RB_DL==100)) { (phy_vars_ue->lte_frame_parms.N_RB_DL==100)) {
...@@ -213,7 +214,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue, ...@@ -213,7 +214,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue,
// Treat first 2 pilots specially (left edge) // Treat first 2 pilots specially (left edge)
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
// printf("pilot 0 : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); #ifdef DEBUG_CH
printf("pilot 0 : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif
multadd_real_vector_complex_scalar(fl, multadd_real_vector_complex_scalar(fl,
ch, ch,
dl_ch, dl_ch,
...@@ -224,7 +227,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue, ...@@ -224,7 +227,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue,
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
// printf("pilot 1 : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); #ifdef DEBUG_CH
printf("pilot 1 : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif
multadd_real_vector_complex_scalar(f2l2, multadd_real_vector_complex_scalar(f2l2,
ch, ch,
dl_ch, dl_ch,
...@@ -235,15 +240,13 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue, ...@@ -235,15 +240,13 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue,
for (pilot_cnt=2; pilot_cnt<((phy_vars_ue->lte_frame_parms.N_RB_DL)-1); pilot_cnt+=2) { for (pilot_cnt=2; pilot_cnt<((phy_vars_ue->lte_frame_parms.N_RB_DL)-1); pilot_cnt+=2) {
// printf("%d\n",dl_ch-(int16_t *)&dl_ch_estimates[(p<<1)+aarx][ch_offset]);
// printf("pilot[%d][%d] (%d,%d)\n",p,pilot_cnt,pil[0],pil[1]);
// printf("rx[%d] -> (%d,%d)\n", k, rxF[0], rxF[1]);
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); //Re ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); //Re
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); //Im ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); //Im
// printf("**rb %d %d\n",rb,dl_ch-(int16_t *)&dl_ch_estimates[(p<<1)+aarx][ch_offset]); #ifdef DEBUG_CH
printf("pilot %d : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif
multadd_real_vector_complex_scalar(f, multadd_real_vector_complex_scalar(f,
ch, ch,
dl_ch, dl_ch,
...@@ -254,13 +257,11 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue, ...@@ -254,13 +257,11 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue,
rxF+=12; rxF+=12;
dl_ch+=8; dl_ch+=8;
// printf("pilot[%d][%d] (%d,%d)\n",p,rb,pil[0],pil[1]);
// printf("rx[%d] -> (%d,%d)\n", k+6, rxF[0], rxF[1]);
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
// printf("**rb %d %d\n",rb,dl_ch-(int16_t *)&dl_ch_estimates[(p<<1)+aarx][ch_offset]); #ifdef DEBUG_CH
printf("pilot %d : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+1,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif
multadd_real_vector_complex_scalar(f2, multadd_real_vector_complex_scalar(f2,
ch, ch,
dl_ch, dl_ch,
...@@ -281,15 +282,17 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue, ...@@ -281,15 +282,17 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue,
rxF = (int16_t *)&rxdataF[aarx][((symbol_offset+1+k))]; rxF = (int16_t *)&rxdataF[aarx][((symbol_offset+1+k))];
#ifdef DEBUG_CH
printf("second half k %d\n",k);
#endif
for (pilot_cnt=0; pilot_cnt<((phy_vars_ue->lte_frame_parms.N_RB_DL)-3); pilot_cnt+=2) { for (pilot_cnt=0; pilot_cnt<((phy_vars_ue->lte_frame_parms.N_RB_DL)-3); pilot_cnt+=2) {
// printf("pilot[%d][%d] (%d,%d)\n",p,pilot_cnt,pil[0],pil[1]);
// printf("rx[%d] -> (%d,%d)\n", k+6, rxF[0], rxF[1]);
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
#ifdef DEBUG_CH
// printf("**rb %d %d\n",rb,dl_ch-(int16_t *)&dl_ch_estimates[(p<<1)+aarx][ch_offset]); printf("pilot %d : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif
multadd_real_vector_complex_scalar(f, multadd_real_vector_complex_scalar(f,
ch, ch,
dl_ch, dl_ch,
...@@ -300,8 +303,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue, ...@@ -300,8 +303,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue,
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
#ifdef DEBUG_CH
// printf("**rb %d %d\n",rb,dl_ch-(int16_T *)&dl_ch_estimates[(p<<1)+aarx][ch_offset]); printf("pilot %d : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+1,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif
multadd_real_vector_complex_scalar(f2, multadd_real_vector_complex_scalar(f2,
ch, ch,
dl_ch, dl_ch,
...@@ -314,8 +318,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue, ...@@ -314,8 +318,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue,
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
// printf("pilot 49: rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); #ifdef DEBUG_CH
printf("pilot %d: rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif
multadd_real_vector_complex_scalar(fr, multadd_real_vector_complex_scalar(fr,
ch, ch,
dl_ch, dl_ch,
...@@ -326,7 +331,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue, ...@@ -326,7 +331,9 @@ int lte_dl_channel_estimation(PHY_VARS_UE *phy_vars_ue,
ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15);
ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15);
// printf("pilot 50: rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); #ifdef DEBUG_CH
printf("pilot %d: rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+1,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]);
#endif
multadd_real_vector_complex_scalar(f2r2, multadd_real_vector_complex_scalar(f2r2,
ch, ch,
dl_ch, dl_ch,
......
...@@ -52,8 +52,8 @@ ...@@ -52,8 +52,8 @@
int* sync_corr_ue0 = NULL; int* sync_corr_ue0 = NULL;
int* sync_corr_ue1 = NULL; int* sync_corr_ue1 = NULL;
int* sync_corr_ue2 = NULL; int* sync_corr_ue2 = NULL;
int sync_tmp[2048*4] __attribute__((aligned(16))); int sync_tmp[2048*4] __attribute__((aligned(32)));
short syncF_tmp[2048*2] __attribute__((aligned(16))); short syncF_tmp[2048*2] __attribute__((aligned(32)));
......
...@@ -56,8 +56,8 @@ void lte_sync_timefreq(PHY_VARS_UE *ue,int band,unsigned int DL_freq) ...@@ -56,8 +56,8 @@ void lte_sync_timefreq(PHY_VARS_UE *ue,int band,unsigned int DL_freq)
{ {
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
UE_SCAN_INFO_t *scan_info = &ue->scan_info[band]; UE_SCAN_INFO_t *scan_info = &ue->scan_info[band];
int16_t spectrum[12288] __attribute__((aligned(16))); int16_t spectrum[12288] __attribute__((aligned(32)));
int16_t spectrum_p5ms[12288] __attribute__((aligned(16))); int16_t spectrum_p5ms[12288] __attribute__((aligned(32)));
int i,f,band_idx; int i,f,band_idx;
__m128i autocorr0[256/4],autocorr1[256/4],autocorr2[256/4]; __m128i autocorr0[256/4],autocorr1[256/4],autocorr2[256/4];
__m128i autocorr0_t[256/4],autocorr1_t[256/4],autocorr2_t[256/4]; __m128i autocorr0_t[256/4],autocorr1_t[256/4],autocorr2_t[256/4];
......
...@@ -61,21 +61,18 @@ void lte_gold(LTE_DL_FRAME_PARMS *frame_parms,uint32_t lte_gold_table[20][2][14] ...@@ -61,21 +61,18 @@ void lte_gold(LTE_DL_FRAME_PARMS *frame_parms,uint32_t lte_gold_table[20][2][14]
x2 = Ncp + x2 = Ncp +
(Nid_cell<<1) + (Nid_cell<<1) +
(((1+(Nid_cell<<1))*(1 + (((frame_parms->Ncp==0)?4:3)*l) + (7*(1+ns))))<<10); //cinit (((1+(Nid_cell<<1))*(1 + (((frame_parms->Ncp==0)?4:3)*l) + (7*(1+ns))))<<10); //cinit
//x2 = frame_parms->Ncp + (Nid_cell<<1) + (1+(Nid_cell<<1))*(1 + (3*l) + (7*(1+ns))); //cinit //x2 = frame_parms->Ncp + (Nid_cell<<1) + (1+(Nid_cell<<1))*(1 + (3*l) + (7*(1+ns))); //cinit
//n = 0 //n = 0
// printf("cinit (ns %d, l %d) => %d\n",ns,l,x2);
x1 = 1+ (1<<31); x1 = 1+ (1<<31);
x2=x2 ^ ((x2 ^ (x2>>1) ^ (x2>>2) ^ (x2>>3))<<31); x2=x2 ^ ((x2 ^ (x2>>1) ^ (x2>>2) ^ (x2>>3))<<31);
// skip first 50 double words (1600 bits) // skip first 50 double words (1600 bits)
//printf("n=0 : x1 %x, x2 %x\n",x1,x2);
for (n=1; n<50; n++) { for (n=1; n<50; n++) {
x1 = (x1>>1) ^ (x1>>4); x1 = (x1>>1) ^ (x1>>4);
x1 = x1 ^ (x1<<31) ^ (x1<<28); x1 = x1 ^ (x1<<31) ^ (x1<<28);
x2 = (x2>>1) ^ (x2>>2) ^ (x2>>3) ^ (x2>>4); x2 = (x2>>1) ^ (x2>>2) ^ (x2>>3) ^ (x2>>4);
x2 = x2 ^ (x2<<31) ^ (x2<<30) ^ (x2<<29) ^ (x2<<28); x2 = x2 ^ (x2<<31) ^ (x2<<30) ^ (x2<<29) ^ (x2<<28);
// printf("x1 : %x, x2 : %x\n",x1,x2);
} }
for (n=0; n<14; n++) { for (n=0; n<14; n++) {
...@@ -84,7 +81,6 @@ void lte_gold(LTE_DL_FRAME_PARMS *frame_parms,uint32_t lte_gold_table[20][2][14] ...@@ -84,7 +81,6 @@ void lte_gold(LTE_DL_FRAME_PARMS *frame_parms,uint32_t lte_gold_table[20][2][14]
x2 = (x2>>1) ^ (x2>>2) ^ (x2>>3) ^ (x2>>4); x2 = (x2>>1) ^ (x2>>2) ^ (x2>>3) ^ (x2>>4);
x2 = x2 ^ (x2<<31) ^ (x2<<30) ^ (x2<<29) ^ (x2<<28); x2 = x2 ^ (x2<<31) ^ (x2<<30) ^ (x2<<29) ^ (x2<<28);
lte_gold_table[ns][l][n] = x1^x2; lte_gold_table[ns][l][n] = x1^x2;
// printf("n=%d : c %x\n",n,x1^x2);
} }
} }
......
...@@ -117,7 +117,7 @@ LTE_UE_DLSCH_t *new_ue_dlsch(uint8_t Kmimo,uint8_t Mdlharq,uint32_t Nsoft,uint8_ ...@@ -117,7 +117,7 @@ LTE_UE_DLSCH_t *new_ue_dlsch(uint8_t Kmimo,uint8_t Mdlharq,uint32_t Nsoft,uint8_
dlsch->max_turbo_iterations = max_turbo_iterations; dlsch->max_turbo_iterations = max_turbo_iterations;
for (i=0; i<Mdlharq; i++) { for (i=0; i<Mdlharq; i++) {
// msg("new_ue_dlsch: Harq process %d\n",i); // printf("new_ue_dlsch: Harq process %d\n",i);
dlsch->harq_processes[i] = (LTE_DL_UE_HARQ_t *)malloc16(sizeof(LTE_DL_UE_HARQ_t)); dlsch->harq_processes[i] = (LTE_DL_UE_HARQ_t *)malloc16(sizeof(LTE_DL_UE_HARQ_t));
if (dlsch->harq_processes[i]) { if (dlsch->harq_processes[i]) {
...@@ -156,7 +156,7 @@ LTE_UE_DLSCH_t *new_ue_dlsch(uint8_t Kmimo,uint8_t Mdlharq,uint32_t Nsoft,uint8_ ...@@ -156,7 +156,7 @@ LTE_UE_DLSCH_t *new_ue_dlsch(uint8_t Kmimo,uint8_t Mdlharq,uint32_t Nsoft,uint8_
return(dlsch); return(dlsch);
} }
msg("new_ue_dlsch with size %zu: exit_flag = %u\n",sizeof(LTE_DL_UE_HARQ_t), exit_flag); printf("new_ue_dlsch with size %zu: exit_flag = %u\n",sizeof(LTE_DL_UE_HARQ_t), exit_flag);
free_ue_dlsch(dlsch); free_ue_dlsch(dlsch);
return(NULL); return(NULL);
...@@ -187,6 +187,27 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue, ...@@ -187,6 +187,27 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue,
uint8_t crc_type; uint8_t crc_type;
#ifdef DEBUG_DLSCH_DECODING #ifdef DEBUG_DLSCH_DECODING
uint16_t i; uint16_t i;
#endif
#ifdef __AVX2__
int Kr_last,skipped_last=0;
uint8_t (*tc_2cw)(int16_t *y,
int16_t *y2,
uint8_t *,
uint8_t *,
uint16_t,
uint16_t,
uint16_t,
uint8_t,
uint8_t,
uint8_t,
time_stats_t *,
time_stats_t *,
time_stats_t *,
time_stats_t *,
time_stats_t *,
time_stats_t *,
time_stats_t *);
#endif #endif
uint8_t (*tc)(int16_t *y, uint8_t (*tc)(int16_t *y,
uint8_t *, uint8_t *,
...@@ -204,28 +225,35 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue, ...@@ -204,28 +225,35 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue,
time_stats_t *, time_stats_t *,
time_stats_t *); time_stats_t *);
if (!dlsch_llr) { if (!dlsch_llr) {
msg("dlsch_decoding.c: NULL dlsch_llr pointer\n"); printf("dlsch_decoding.c: NULL dlsch_llr pointer\n");
return(dlsch->max_turbo_iterations); return(dlsch->max_turbo_iterations);
} }
if (!harq_process) { if (!harq_process) {
msg("dlsch_decoding.c: NULL harq_process pointer\n"); printf("dlsch_decoding.c: NULL harq_process pointer\n");
return(dlsch->max_turbo_iterations); return(dlsch->max_turbo_iterations);
} }
if (!frame_parms) { if (!frame_parms) {
msg("dlsch_decoding.c: NULL frame_parms pointer\n"); printf("dlsch_decoding.c: NULL frame_parms pointer\n");
return(dlsch->max_turbo_iterations); return(dlsch->max_turbo_iterations);
} }
if (subframe>9) { if (subframe>9) {
msg("dlsch_decoding.c: Illegal subframe index %d\n",subframe); printf("dlsch_decoding.c: Illegal subframe index %d\n",subframe);
return(dlsch->max_turbo_iterations); return(dlsch->max_turbo_iterations);
} }
if (llr8_flag == 0) if (llr8_flag == 0) {
#ifdef __AVX2__
tc_2cw = phy_threegpplte_turbo_decoder16avx2;
#endif
tc = phy_threegpplte_turbo_decoder16; tc = phy_threegpplte_turbo_decoder16;
}
else else
tc = phy_threegpplte_turbo_decoder8; tc = phy_threegpplte_turbo_decoder8;
...@@ -233,13 +261,13 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue, ...@@ -233,13 +261,13 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue,
/* /*
if (nb_rb > frame_parms->N_RB_DL) { if (nb_rb > frame_parms->N_RB_DL) {
msg("dlsch_decoding.c: Illegal nb_rb %d\n",nb_rb); printf("dlsch_decoding.c: Illegal nb_rb %d\n",nb_rb);
return(max_turbo_iterations); return(max_turbo_iterations);
}*/ }*/
/*harq_pid = dlsch->current_harq_pid; /*harq_pid = dlsch->current_harq_pid;
if (harq_pid >= 8) { if (harq_pid >= 8) {
msg("dlsch_decoding.c: Illegal harq_pid %d\n",harq_pid); printf("dlsch_decoding.c: Illegal harq_pid %d\n",harq_pid);
return(max_turbo_iterations); return(max_turbo_iterations);
} }
*/ */
...@@ -254,7 +282,7 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue, ...@@ -254,7 +282,7 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue,
G = harq_process->G; G = harq_process->G;
//get_G(frame_parms,nb_rb,dlsch->rb_alloc,mod_order,num_pdcch_symbols,phy_vars_ue->frame,subframe); //get_G(frame_parms,nb_rb,dlsch->rb_alloc,mod_order,num_pdcch_symbols,phy_vars_ue->frame,subframe);
// msg("DLSCH Decoding, harq_pid %d Ndi %d\n",harq_pid,harq_process->Ndi); // printf("DLSCH Decoding, harq_pid %d Ndi %d\n",harq_pid,harq_process->Ndi);
if (harq_process->round == 0) { if (harq_process->round == 0) {
// This is a new packet, so compute quantities regarding segmentation // This is a new packet, so compute quantities regarding segmentation
...@@ -273,7 +301,7 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue, ...@@ -273,7 +301,7 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue,
/* /*
else { else {
msg("dlsch_decoding.c: Ndi>0 not checked yet!!\n"); printf("dlsch_decoding.c: Ndi>0 not checked yet!!\n");
return(max_turbo_iterations); return(max_turbo_iterations);
} }
*/ */
...@@ -300,10 +328,14 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue, ...@@ -300,10 +328,14 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue,
break; break;
} }
if (harq_process->C >= MAX_NUM_DLSCH_SEGMENTS/bw_scaling) { if (harq_process->C > MAX_NUM_DLSCH_SEGMENTS/bw_scaling) {
LOG_E(PHY,"Illegal harq_process->C %d > %d\n",harq_process->C,MAX_NUM_DLSCH_SEGMENTS/bw_scaling); LOG_E(PHY,"Illegal harq_process->C %d > %d\n",harq_process->C,MAX_NUM_DLSCH_SEGMENTS/bw_scaling);
return((1+dlsch->max_turbo_iterations)); return((1+dlsch->max_turbo_iterations));
} }
#ifdef DEBUG_DLSCH_DECODING
printf("Segmentation: C %d, Cminus %d, Kminus %d, Kplus %d\n",harq_process->C,harq_process->Cminus,harq_process->Kminus,harq_process->Kplus);
#endif
for (r=0; r<harq_process->C; r++) { for (r=0; r<harq_process->C; r++) {
...@@ -324,7 +356,7 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue, ...@@ -324,7 +356,7 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue,
else if (Kr_bytes <= 768) else if (Kr_bytes <= 768)
iind = 123 + ((Kr_bytes-256)>>3); iind = 123 + ((Kr_bytes-256)>>3);
else { else {
msg("dlsch_decoding: Illegal codeword size %d!!!\n",Kr_bytes); printf("dlsch_decoding: Illegal codeword size %d!!!\n",Kr_bytes);
return(dlsch->max_turbo_iterations); return(dlsch->max_turbo_iterations);
} }
...@@ -418,15 +450,12 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue, ...@@ -418,15 +450,12 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue,
printf("\n"); printf("\n");
*/ */
//#ifndef __AVX2__
#if 1
if (err_flag == 0) { if (err_flag == 0) {
start_meas(dlsch_turbo_decoding_stats); start_meas(dlsch_turbo_decoding_stats);
#ifdef TURBO_S
ret = phy_threegpplte_turbo_decoder_scalar
#else
ret = tc ret = tc
#endif
(&harq_process->d[r][96], (&harq_process->d[r][96],
harq_process->c[r], harq_process->c[r],
Kr, Kr,
...@@ -446,7 +475,130 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue, ...@@ -446,7 +475,130 @@ uint32_t dlsch_decoding(PHY_VARS_UE *phy_vars_ue,
stop_meas(dlsch_turbo_decoding_stats); stop_meas(dlsch_turbo_decoding_stats);
} }
#else
if ((harq_process->C == 1) ||
((r==harq_process->C-1) && (skipped_last==0))) { // last segment with odd number of segments
start_meas(dlsch_turbo_decoding_stats);
ret = tc
(&harq_process->d[r][96],
harq_process->c[r],
Kr,
f1f2mat_old[iind*2],
f1f2mat_old[(iind*2)+1],
dlsch->max_turbo_iterations,
crc_type,
(r==0) ? harq_process->F : 0,
&phy_vars_ue->dlsch_tc_init_stats,
&phy_vars_ue->dlsch_tc_alpha_stats,
&phy_vars_ue->dlsch_tc_beta_stats,
&phy_vars_ue->dlsch_tc_gamma_stats,
&phy_vars_ue->dlsch_tc_ext_stats,
&phy_vars_ue->dlsch_tc_intl1_stats,
&phy_vars_ue->dlsch_tc_intl2_stats); //(is_crnti==0)?harq_pid:harq_pid+1);
stop_meas(dlsch_turbo_decoding_stats);
// printf("single decode, exit\n");
// exit(-1);
}
else {
// we can merge code segments
if ((skipped_last == 0) && (r<harq_process->C-1)) {
skipped_last = 1;
Kr_last = Kr;
}
else {
skipped_last=0;
if (Kr_last == Kr) { // decode 2 code segments with AVX2 version
#ifdef DEBUG_DLSCH_DECODING
printf("single decoding segment %d (%p)\n",r-1,&harq_process->d[r-1][96]);
#endif
start_meas(dlsch_turbo_decoding_stats);
#ifdef DEBUG_DLSCH_DECODING
printf("double decoding segments %d,%d (%p,%p)\n",r-1,r,&harq_process->d[r-1][96],&harq_process->d[r][96]);
#endif
ret = tc_2cw
(&harq_process->d[r-1][96],
&harq_process->d[r][96],
harq_process->c[r-1],
harq_process->c[r],
Kr,
f1f2mat_old[iind*2],
f1f2mat_old[(iind*2)+1],
dlsch->max_turbo_iterations,
crc_type,
(r==0) ? harq_process->F : 0,
&phy_vars_ue->dlsch_tc_init_stats,
&phy_vars_ue->dlsch_tc_alpha_stats,
&phy_vars_ue->dlsch_tc_beta_stats,
&phy_vars_ue->dlsch_tc_gamma_stats,
&phy_vars_ue->dlsch_tc_ext_stats,
&phy_vars_ue->dlsch_tc_intl1_stats,
&phy_vars_ue->dlsch_tc_intl2_stats); //(is_crnti==0)?harq_pid:harq_pid+1);
/*
ret = tc
(&harq_process->d[r-1][96],
harq_process->c[r-1],
Kr_last,
f1f2mat_old[iind*2],
f1f2mat_old[(iind*2)+1],
dlsch->max_turbo_iterations,
crc_type,
(r==0) ? harq_process->F : 0,
&phy_vars_ue->dlsch_tc_init_stats,
&phy_vars_ue->dlsch_tc_alpha_stats,
&phy_vars_ue->dlsch_tc_beta_stats,
&phy_vars_ue->dlsch_tc_gamma_stats,
&phy_vars_ue->dlsch_tc_ext_stats,
&phy_vars_ue->dlsch_tc_intl1_stats,
&phy_vars_ue->dlsch_tc_intl2_stats); //(is_crnti==0)?harq_pid:harq_pid+1);
exit(-1);*/
stop_meas(dlsch_turbo_decoding_stats);
}
else { // Kr_last != Kr
start_meas(dlsch_turbo_decoding_stats);
ret = tc
(&harq_process->d[r-1][96],
harq_process->c[r-1],
Kr_last,
f1f2mat_old[iind*2],
f1f2mat_old[(iind*2)+1],
dlsch->max_turbo_iterations,
crc_type,
(r==0) ? harq_process->F : 0,
&phy_vars_ue->dlsch_tc_init_stats,
&phy_vars_ue->dlsch_tc_alpha_stats,
&phy_vars_ue->dlsch_tc_beta_stats,