diff --git a/openair1/PHY/LTE_UE_TRANSPORT/transport_ue.h b/openair1/PHY/LTE_UE_TRANSPORT/transport_ue.h index 6b84a5f9693981e816767a68a8e09704d83895c3..b4ae44f6eab8b663a98de22b151338385c76bf33 100644 --- a/openair1/PHY/LTE_UE_TRANSPORT/transport_ue.h +++ b/openair1/PHY/LTE_UE_TRANSPORT/transport_ue.h @@ -155,8 +155,6 @@ typedef struct { uint8_t h[MAX_NUM_CHANNEL_BITS]; /// Scrambled "b"-sequences (for definition see 36-211 V8.6 2009-03, p.14) uint8_t b_tilde[MAX_NUM_CHANNEL_BITS]; - /// Modulated "d"-sequences (for definition see 36-211 V8.6 2009-03, p.14) - int32_t d[MAX_NUM_RE]; /// Transform-coded "z"-sequences (for definition see 36-211 V8.6 2009-03, p.14-15) int32_t z[MAX_NUM_RE]; /// "q" sequences for CQI/PMI (for definition see 36-212 V8.6 2009-03, p.27) diff --git a/openair1/PHY/LTE_UE_TRANSPORT/ulsch_modulation.c b/openair1/PHY/LTE_UE_TRANSPORT/ulsch_modulation.c index 7ec3e7bfb684dac869dc84bc04e4bb924995117b..6a663e89404e5a91a0187a20f52c4bf152863d16 100644 --- a/openair1/PHY/LTE_UE_TRANSPORT/ulsch_modulation.c +++ b/openair1/PHY/LTE_UE_TRANSPORT/ulsch_modulation.c @@ -42,7 +42,7 @@ //#define DEBUG_ULSCH_MODULATION -void dft_lte(int32_t *z,int32_t *d, int32_t Msc_PUSCH, uint8_t Nsymb) +void dft_lte(int32_t *z,struct complex16 *input, int32_t Msc_PUSCH, uint8_t Nsymb) { #if defined(__x86_64__) || defined(__i386__) @@ -66,7 +66,7 @@ void dft_lte(int32_t *z,int32_t *d, int32_t Msc_PUSCH, uint8_t Nsymb) #endif // printf("Doing lte_dft for Msc_PUSCH %d\n",Msc_PUSCH); - d0 = (uint32_t *)d; + d0 = (uint32_t *)input; d1 = d0+Msc_PUSCH; d2 = d1+Msc_PUSCH; d3 = d2+Msc_PUSCH; @@ -476,7 +476,8 @@ void ulsch_modulation(int32_t **txdataF, // Modulation ulsch_Msymb = G/Q_m; - + /// Modulated "d"-sequences (for definition see 36-211 V8.6 2009-03, p.14) + struct complex16 d[MAX_NUM_RE] __attribute__((aligned(32))); if(ulsch->cooperation_flag == 2) // For Distributed Alamouti Scheme in Collabrative Communication { @@ -488,14 +489,14 @@ void ulsch_modulation(int32_t **txdataF, //UE1, -x1* - ((int16_t*)&ulsch->d[i])[0] = (ulsch->b_tilde[j] == 1) ? (gain_lin_QPSK) : -gain_lin_QPSK; - ((int16_t*)&ulsch->d[i])[1] = (ulsch->b_tilde[j+1] == 1)? (-gain_lin_QPSK) : gain_lin_QPSK; + d[i].r = (ulsch->b_tilde[j] == 1) ? (gain_lin_QPSK) : -gain_lin_QPSK; + d[i].i = (ulsch->b_tilde[j+1] == 1)? (-gain_lin_QPSK) : gain_lin_QPSK; // if (i<Msc_PUSCH) // printf("input %d (%p): %d,%d\n", i,&ulsch->d[i],((int16_t*)&ulsch->d[i])[0],((int16_t*)&ulsch->d[i])[1]); // UE1, x0* - ((int16_t*)&ulsch->d[i+1])[0] = (ulsch->b_tilde[j-2] == 1) ? (-gain_lin_QPSK) : gain_lin_QPSK; - ((int16_t*)&ulsch->d[i+1])[1] = (ulsch->b_tilde[j-1] == 1)? (gain_lin_QPSK) : -gain_lin_QPSK; + d[i+1].r = (ulsch->b_tilde[j-2] == 1) ? (-gain_lin_QPSK) : gain_lin_QPSK; + d[i+1].i = (ulsch->b_tilde[j-1] == 1)? (gain_lin_QPSK) : -gain_lin_QPSK; break; @@ -521,8 +522,8 @@ void ulsch_modulation(int32_t **txdataF, qam16_table_offset_im+=1; - ((int16_t*)&ulsch->d[i])[0]=-(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15); - ((int16_t*)&ulsch->d[i])[1]=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15); + d[i].r =-(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15); + d[i].i =(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15); //UE1,x0* qam16_table_offset_re = 0; @@ -544,8 +545,8 @@ void ulsch_modulation(int32_t **txdataF, // ((int16_t*)&ulsch->d[i+1])[0]=-(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15); // ((int16_t*)&ulsch->d[i+1])[1]=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15); - ((int16_t*)&ulsch->d[i+1])[0]=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15); - ((int16_t*)&ulsch->d[i+1])[1]=-(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15); + d[i+1].r=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15); + d[i+1].i=-(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15); break; @@ -578,8 +579,8 @@ void ulsch_modulation(int32_t **txdataF, qam64_table_offset_im+=1; - ((int16_t*)&ulsch->d[i])[0]=-(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_re])>>15); - ((int16_t*)&ulsch->d[i])[1]=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_im])>>15); + d[i].r=-(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_re])>>15); + d[i].i=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_im])>>15); //UE1,x0* qam64_table_offset_re = 0; @@ -605,8 +606,8 @@ void ulsch_modulation(int32_t **txdataF, qam64_table_offset_im+=1; - ((int16_t*)&ulsch->d[i+1])[0]=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_re])>>15); - ((int16_t*)&ulsch->d[i+1])[1]=-(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_im])>>15); + d[i+1].r=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_re])>>15); + d[i+1].i=-(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_im])>>15); break; @@ -621,8 +622,8 @@ void ulsch_modulation(int32_t **txdataF, case 2: // TODO: this has to be updated!!! - ((int16_t*)&ulsch->d[i])[0] = (ulsch->b_tilde[j] == 1) ? (-gain_lin_QPSK) : gain_lin_QPSK; - ((int16_t*)&ulsch->d[i])[1] = (ulsch->b_tilde[j+1] == 1)? (-gain_lin_QPSK) : gain_lin_QPSK; + d[i].r = (ulsch->b_tilde[j] == 1) ? (-gain_lin_QPSK) : gain_lin_QPSK; + d[i].i = (ulsch->b_tilde[j+1] == 1)? (-gain_lin_QPSK) : gain_lin_QPSK; // if (i<Msc_PUSCH) // printf("input %d/%d Msc_PUSCH %d (%p): %d,%d\n", i,Msymb,Msc_PUSCH,&ulsch->d[i],((int16_t*)&ulsch->d[i])[0],((int16_t*)&ulsch->d[i])[1]); @@ -646,8 +647,8 @@ void ulsch_modulation(int32_t **txdataF, qam16_table_offset_im+=1; - ((int16_t*)&ulsch->d[i])[0]=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15); - ((int16_t*)&ulsch->d[i])[1]=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15); + d[i].r=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_re])>>15); + d[i].i=(int16_t)(((int32_t)amp*qam16_table[qam16_table_offset_im])>>15); // printf("input(16qam) %d (%p): %d,%d\n", i,&ulsch->d[i],((int16_t*)&ulsch->d[i])[0],((int16_t*)&ulsch->d[i])[1]); break; @@ -676,8 +677,8 @@ void ulsch_modulation(int32_t **txdataF, qam64_table_offset_im+=1; - ((int16_t*)&ulsch->d[i])[0]=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_re])>>15); - ((int16_t*)&ulsch->d[i])[1]=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_im])>>15); + d[i].r=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_re])>>15); + d[i].i=(int16_t)(((int32_t)amp*qam64_table[qam64_table_offset_im])>>15); break; @@ -688,7 +689,7 @@ void ulsch_modulation(int32_t **txdataF, // Transform Precoding - dft_lte(ulsch->z,ulsch->d,Msc_PUSCH,ulsch->Nsymb_pusch); + dft_lte(ulsch->z,d,Msc_PUSCH,ulsch->Nsymb_pusch); DevAssert(txdataF); diff --git a/openair1/PHY/MODULATION/slot_fep.c b/openair1/PHY/MODULATION/slot_fep.c index 8f41735f10cfa5fd4a915a7d8cb825b2305c03ba..b2c7d934c6583c88acaf82f38a8f61d543b76b02 100644 --- a/openair1/PHY/MODULATION/slot_fep.c +++ b/openair1/PHY/MODULATION/slot_fep.c @@ -91,7 +91,7 @@ int slot_fep(PHY_VARS_UE *ue, } // subframe_offset_F = frame_parms->ofdm_symbol_size * frame_parms->symbols_per_tti * (Ns>>1); - + if (l<0 || l>=7-frame_parms->Ncp) { printf("slot_fep: l must be between 0 and %d\n",7-frame_parms->Ncp); return(-1); diff --git a/openair1/PHY/TOOLS/oai_dfts.c b/openair1/PHY/TOOLS/oai_dfts.c index c266a7f82bc41dfae2f9f1dee5b276709c842a94..c2b6a55210822fd4e066adf7888d4463309f164d 100644 --- a/openair1/PHY/TOOLS/oai_dfts.c +++ b/openair1/PHY/TOOLS/oai_dfts.c @@ -7082,43 +7082,40 @@ static inline void dft12f(simd_q15_t *x0, simd_q15_t tmp_dft12[12]; - simd_q15_t *tmp_dft12_ptr = &tmp_dft12[0]; - // msg("dft12\n"); bfly4_tw1(x0, x3, x6, x9, - tmp_dft12_ptr, - tmp_dft12_ptr+3, - tmp_dft12_ptr+6, - tmp_dft12_ptr+9); - + tmp_dft12, + tmp_dft12+3, + tmp_dft12+6, + tmp_dft12+9); bfly4_tw1(x1, x4, x7, x10, - tmp_dft12_ptr+1, - tmp_dft12_ptr+4, - tmp_dft12_ptr+7, - tmp_dft12_ptr+10); + tmp_dft12+1, + tmp_dft12+4, + tmp_dft12+7, + tmp_dft12+10); bfly4_tw1(x2, x5, x8, x11, - tmp_dft12_ptr+2, - tmp_dft12_ptr+5, - tmp_dft12_ptr+8, - tmp_dft12_ptr+11); + tmp_dft12+2, + tmp_dft12+5, + tmp_dft12+8, + tmp_dft12+11); // k2=0; - bfly3_tw1(tmp_dft12_ptr, - tmp_dft12_ptr+1, - tmp_dft12_ptr+2, + bfly3_tw1(tmp_dft12, + tmp_dft12+1, + tmp_dft12+2, y0, y4, y8); @@ -7126,9 +7123,9 @@ static inline void dft12f(simd_q15_t *x0, // k2=1; - bfly3(tmp_dft12_ptr+3, - tmp_dft12_ptr+4, - tmp_dft12_ptr+5, + bfly3(tmp_dft12+3, + tmp_dft12+4, + tmp_dft12+5, y1, y5, y9, @@ -7138,9 +7135,9 @@ static inline void dft12f(simd_q15_t *x0, // k2=2; - bfly3(tmp_dft12_ptr+6, - tmp_dft12_ptr+7, - tmp_dft12_ptr+8, + bfly3(tmp_dft12+6, + tmp_dft12+7, + tmp_dft12+8, y2, y6, y10, @@ -7148,9 +7145,9 @@ static inline void dft12f(simd_q15_t *x0, W4_12); // k2=3; - bfly3(tmp_dft12_ptr+9, - tmp_dft12_ptr+10, - tmp_dft12_ptr+11, + bfly3(tmp_dft12+9, + tmp_dft12+10, + tmp_dft12+11, y3, y7, y11, @@ -10607,11 +10604,19 @@ int dfts_autoinit(void) #ifndef MR_MAIN void dft(uint8_t sizeidx, int16_t *input,int16_t *output,unsigned char scale_flag){ - AssertFatal((sizeidx>=0 && sizeidx<DFT_SIZE_IDXTABLESIZE),"Invalid dft size index %i\n",sizeidx); - AssertFatal( ((intptr_t)output&0x1F)==0,"Buffers should be 32 bytes aligned %p",output); - if ((intptr_t)input&0x1F) { + AssertFatal((sizeidx >= 0 && sizeidx<DFT_SIZE_IDXTABLESIZE),"Invalid dft size index %i\n",sizeidx); + int algn=0xF; + #ifdef __AVX2__ + if ( (dft_ftab[sizeidx].size%3) != 0 ) // there is no AVX2 implementation for multiples of 3 DFTs + algn=0x1F; + #endif + AssertFatal(((intptr_t)output&algn)==0,"Buffers should be aligned %p",output); + if (((intptr_t)input)&algn) { LOG_D(PHY, "DFT called with input not aligned, add a memcpy, size %d\n", sizeidx); - int16_t tmp[dft_ftab[sizeidx].size*2] __attribute__ ((aligned(32))); // input and output are not in right type (int16_t instead of c16_t) + int sz=dft_ftab[sizeidx].size; + if (sizeidx==DFT_12) // This case does 8 DFTs in // + sz*=8; + int16_t tmp[sz*2] __attribute__ ((aligned(32))); // input and output are not in right type (int16_t instead of c16_t) memcpy(tmp, input, sizeof tmp); dft_ftab[sizeidx].func(tmp,output,scale_flag); } else @@ -10620,10 +10625,15 @@ void dft(uint8_t sizeidx, int16_t *input,int16_t *output,unsigned char scale_fla void idft(uint8_t sizeidx, int16_t *input,int16_t *output,unsigned char scale_flag){ AssertFatal((sizeidx>=0 && sizeidx<DFT_SIZE_IDXTABLESIZE),"Invalid idft size index %i\n",sizeidx); - AssertFatal( ((intptr_t)output&0x1F)==0,"Buffers should be 32 bytes aligned %p",output); - if ((intptr_t)input&0x1F) { + int algn=0xF; + #ifdef __AVX2__ + algn=0x1F; + #endif + AssertFatal( ((intptr_t)output&algn)==0,"Buffers should be 16 bytes aligned %p",output); + if (((intptr_t)input)&algn ) { LOG_D(PHY, "DFT called with input not aligned, add a memcpy\n"); - int16_t tmp[idft_ftab[sizeidx].size*2] __attribute__ ((aligned(32))); // input and output are not in right type (int16_t instead of c16_t) + int sz=idft_ftab[sizeidx].size; + int16_t tmp[sz*2] __attribute__ ((aligned(32))); // input and output are not in right type (int16_t instead of c16_t) memcpy(tmp, input, sizeof tmp); dft_ftab[sizeidx].func(tmp,output,scale_flag); } else diff --git a/openair1/PHY/TOOLS/tools_defs.h b/openair1/PHY/TOOLS/tools_defs.h index 7cee04fc59b78659c8de7edbd4bf104dd9b8d152..86bce9614fd442ef47ad3670901c477c17d21378 100644 --- a/openair1/PHY/TOOLS/tools_defs.h +++ b/openair1/PHY/TOOLS/tools_defs.h @@ -253,7 +253,7 @@ This function performs optimized fixed-point radix-2 FFT/IFFT. SZ_DEF(73728) \ SZ_DEF(98304) -#define FOREACH_IDFTSZ(SZ_DEF) \ +#define FOREACH_IDFTSZ(SZ_DEF)\ SZ_DEF(64) \ SZ_DEF(128) \ SZ_DEF(256) \