diff --git a/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c b/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c index 7df5b232cc637d663d55ba1e22490aa365f386e9..5a7ca6d3cf89eaacd1f4c1acce7ba23d57be369e 100644 --- a/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c +++ b/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c @@ -813,6 +813,8 @@ void dlsch_channel_compensation(int **rxdataF_ext, PHY_MEASUREMENTS *phy_measurements) { +#if defined(__i386) || defined(__x86_64) + unsigned short rb; unsigned char aatx,aarx,symbol_mod,pilots=0; __m128i *dl_ch128,*dl_ch128_2,*dl_ch_mag128,*dl_ch_mag128b,*rxdataF128,*rxdataF_comp128,*rho128; @@ -1059,8 +1061,208 @@ void dlsch_channel_compensation(int **rxdataF_ext, _mm_empty(); _m_empty(); + +#elif defined(__arm__) + + + unsigned short rb; + unsigned char aatx,aarx,symbol_mod,pilots=0; + + int16x4_t *dl_ch128,*dl_ch128_2,*rxdataF128,*rho128; + int32x4_t mmtmpD0,mmtmpD1; + int16x8_t *dl_ch_mag128,*dl_ch_mag128b,mmtmpD2,mmtmpD3,*rxdataF_comp128; + int16x4_t QAM_amp128,QAM_amp128b; + + int16_t conj[4]__attribute__((aligned(16))) = {1,-1,1,-1}; + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + + if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp))) { + if (frame_parms->mode1_flag==1) // 10 out of 12 so don't reduce size + { nb_rb=1+(5*nb_rb/6); } + + else + { pilots=1; } + } + + for (aatx=0; aatx<frame_parms->nb_antennas_tx_eNB; aatx++) { + if (mod_order == 4) { + QAM_amp128 = vmovq_n_s16(QAM16_n1); // 2/sqrt(10) + QAM_amp128b = vmovq_n_s16(0); + + } else if (mod_order == 6) { + QAM_amp128 = vmovq_n_s16(QAM64_n1); // + QAM_amp128b = vmovq_n_s16(QAM64_n2); + } + + // printf("comp: rxdataF_comp %p, symbol %d\n",rxdataF_comp[0],symbol); + + for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { + + + + dl_ch128 = (int16x4_t*)&dl_ch_estimates_ext[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128 = (int16x8_t*)&dl_ch_mag[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128b = (int16x8_t*)&dl_ch_magb[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12]; + rxdataF128 = (int16x4_t*)&rxdataF_ext[aarx][symbol*frame_parms->N_RB_DL*12]; + rxdataF_comp128 = (int16x8_t*)&rxdataF_comp[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12]; + + for (rb=0; rb<nb_rb; rb++) { + if (mod_order>2) { + // get channel amplitude if not QPSK + mmtmpD0 = vmull_s16(dl_ch128[0], dl_ch128[0]); + // mmtmpD0 = [ch0*ch0,ch1*ch1,ch2*ch2,ch3*ch3]; + mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0),-output_shift)); + // mmtmpD0 = [ch0*ch0 + ch1*ch1,ch0*ch0 + ch1*ch1,ch2*ch2 + ch3*ch3,ch2*ch2 + ch3*ch3]>>output_shift on 32-bits + mmtmpD1 = vmull_s16(dl_ch128[1], dl_ch128[1]); + mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1),-output_shift)); + mmtmpD2 = vcombine_s16(vqmovn_s32(mmtmpD0),vwmovn_s32(mmtmpD1)); + // mmtmpD2 = [ch0*ch0 + ch1*ch1,ch0*ch0 + ch1*ch1,ch2*ch2 + ch3*ch3,ch2*ch2 + ch3*ch3,ch4*ch4 + ch5*ch5,ch4*ch4 + ch5*ch5,ch6*ch6 + ch7*ch7,ch6*ch6 + ch7*ch7]>>output_shift on 16-bits + mmtmpD0 = vmull_s16(dl_ch128[2], dl_ch128[2]); + mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0),-output_shift)); + mmtmpD1 = vmull_s16(dl_ch128[3], dl_ch128[3]); + mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1),-output_shift)); + mmtmpD3 = vcombine_s16(vqmovn_s32(mmtmpD0),vwmovn_s32(mmtmpD1)); + if (pilots==0) { + mmtmpD0 = vmull_s16(dl_ch128[4], dl_ch128[4]); + mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0),-output_shift)); + mmtmpD1 = vmull_s16(dl_ch128[5], dl_ch128[5]); + mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1),-output_shift)); + mmtmpD4 = vcombine_s16(vqmovn_s32(mmtmpD0),vwmovn_s32(mmtmpD1)); + + + } + + dl_ch_mag128b[0] = vqdmulhq_s16(mmtmpD2,QAM_amp128b); + dl_ch_mag128b[1] = vqdmulhq_s16(mmtmpD3,QAM_amp128b); + dl_ch_mag128[0] = vqdmulhq_s16(mmtmpD2,QAM_amp128); + dl_ch_mag128[1] = vqdmulhq_s16(mmtmpD3,QAM_amp128); + + + if (pilots==0) { + dl_ch_mag128b[2] = vqdmulhq_s16(mmtmpD4,QAM_amp128b); + dl_ch_mag128[2] = vqdmulhq_s16(mmtmpD4,QAM_amp128); + } + } + + mmtmpD0 = vmull_s16(dl_ch128[0], rx_dataF128[0]); + //mmtmpD0 = [Re(ch[0])Re(rx[0]) Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1]) Im(ch[1])Im(ch[1])] + mmtmpD1 = vmull_s16(dl_ch128[1], rx_dataF128[1]); + //mmtmpD1 = [Re(ch[2])Re(rx[2]) Im(ch[2])Im(ch[2]) Re(ch[3])Re(rx[3]) Im(ch[3])Im(ch[3])] + mmtmpD0 = vpadd_s32(mmtmpD0,mmtmpD1); + //mmtmpD0 = [Re(ch[0])Re(rx[0])+Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1])+Im(ch[1])Im(ch[1]) Re(ch[2])Re(rx[2])+Im(ch[2])Im(ch[2]) Re(ch[3])Re(rx[3])+Im(ch[3])Im(ch[3])] + + mmtmpD0 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch128[0],*(int16x4_t*)conj)), rx_dataF128[0]); + //mmtmpD0 = [-Im(ch[0])Re(rx[0]) Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1]) Re(ch[1])Im(rx[1])] + mmtmpD1 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch128[1],*(int16x4_t*)conj)), rx_dataF128[1]); + //mmtmpD0 = [-Im(ch[2])Re(rx[2]) Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3]) Re(ch[3])Im(rx[3])] + mmtmpD1 = vpadd_s32(mmtmpD0,mmtmpD1); + //mmtmpD1 = [-Im(ch[0])Re(rx[0])+Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1])+Re(ch[1])Im(rx[1]) -Im(ch[2])Re(rx[2])+Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3])+Re(ch[3])Im(rx[3])] + + mmtmpD0 = vqshlq_s32(mmtmpD0,-output_shift); + mmtmpD1 = vqshlq_s32(mmtmpD1,-output_shift); + rxdataF_comp128[0] = vcombine_s16(vqmovn_s32(mmtmpD0),vwmovn_s32(mmtmpD1)); + + mmtmpD0 = vmull_s16(dl_ch128[2], rx_dataF128[2]); + mmtmpD1 = vmull_s16(dl_ch128[3], rx_dataF128[3]); + mmtmpD0 = vpadd_s32(mmtmpD0,mmtmpD1); + mmtmpD0 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch128[2],*(int16x4_t*)conj)), rx_dataF128[2]); + mmtmpD1 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch128[3],*(int16x4_t*)conj)), rx_dataF128[3]); + mmtmpD1 = vpadd_s32(mmtmpD0,mmtmpD1); + + mmtmpD0 = vqshlq_s32(mmtmpD0,-output_shift); + mmtmpD1 = vqshlq_s32(mmtmpD1,-output_shift); + rxdataF_comp128[1] = vcombine_s16(vqmovn_s32(mmtmpD0),vwmovn_s32(mmtmpD1)); + + if (pilots==0) { + mmtmpD0 = vmull_s16(dl_ch128[4], rx_dataF128[4]); + mmtmpD1 = vmull_s16(dl_ch128[5], rx_dataF128[5]); + mmtmpD0 = vpadd_s32(mmtmpD0,mmtmpD1); + mmtmpD0 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch128[4],*(int16x4_t*)conj)), rx_dataF128[4]); + mmtmpD1 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch128[5],*(int16x4_t*)conj)), rx_dataF128[5]); + mmtmpD1 = vpadd_s32(mmtmpD0,mmtmpD1); + + mmtmpD0 = vqshlq_s32(mmtmpD0,-output_shift); + mmtmpD1 = vqshlq_s32(mmtmpD1,-output_shift); + rxdataF_comp128[2] = vcombine_s16(vqmovn_s32(mmtmpD0),vwmovn_s32(mmtmpD1)); + + + dl_ch128+=6; + dl_ch_mag128+=3; + dl_ch_mag128b+=3; + rxdataF128+=6; + rxdataF_comp128+=3; + + } else { // we have a smaller PDSCH in symbols with pilots so skip last group of 4 REs and increment less + dl_ch128+=4; + dl_ch_mag128+=2; + dl_ch_mag128b+=2; + rxdataF128+=4; + rxdataF_comp128+=2; + } + } + } + } + + if (rho) { + for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { + rho128 = (int16x8_t*)&rho[aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch128 = (int16x4_t*)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch128_2 = (int16x4_t*)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12]; + + for (rb=0; rb<nb_rb; rb++) { + + mmtmpD0 = vmull_s16(dl_ch128[0], dl_ch128_2[0]); + mmtmpD1 = vmull_s16(dl_ch128[1], dl_ch128_2[1]); + mmtmpD0 = vpadd_s32(mmtmpD0,mmtmpD1); + mmtmpD0 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch128[0],*(int16x4_t*)conj)), dl_ch128_2[0]); + mmtmpD1 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch128[1],*(int16x4_t*)conj)), dl_ch128_2[1]); + mmtmpD1 = vpadd_s32(mmtmpD0,mmtmpD1); + + mmtmpD0 = vqshlq_s32(mmtmpD0,-output_shift); + mmtmpD1 = vqshlq_s32(mmtmpD1,-output_shift); + rho128[0] = vcombine_s16(vqmovn_s32(mmtmpD0),vwmovn_s32(mmtmpD1)); + + mmtmpD0 = vmull_s16(dl_ch128[2], dl_ch128_2[2]); + mmtmpD1 = vmull_s16(dl_ch128[3], dl_ch128_2[3]); + mmtmpD0 = vpadd_s32(mmtmpD0,mmtmpD1); + mmtmpD0 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch128[2],*(int16x4_t*)conj)), dl_ch128_2[2]); + mmtmpD1 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch128[3],*(int16x4_t*)conj)), dl_ch128_2[3]); + mmtmpD1 = vpadd_s32(mmtmpD0,mmtmpD1); + + mmtmpD0 = vqshlq_s32(mmtmpD0,-output_shift); + mmtmpD1 = vqshlq_s32(mmtmpD1,-output_shift); + rho128[1] = vcombine_s16(vqmovn_s32(mmtmpD0),vwmovn_s32(mmtmpD1)); + + mmtmpD0 = vmull_s16(dl_ch128[0], dl_ch128_2[0]); + mmtmpD1 = vmull_s16(dl_ch128[1], dl_ch128_2[1]); + mmtmpD0 = vpadd_s32(mmtmpD0,mmtmpD1); + mmtmpD0 = vrev32q_s16(vmul_s16(dl_ch128[4],*(int16x4_t*)conj), dl_ch128_2[4]); + mmtmpD1 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch128[5],*(int16x4_t*)conj)), dl_ch128_2[5]); + mmtmpD1 = vpadd_s32(mmtmpD0,mmtmpD1); + + mmtmpD0 = vqshlq_s32(mmtmpD0,-output_shift); + mmtmpD1 = vqshlq_s32(mmtmpD1,-output_shift); + rho128[2] = vcombine_s16(vqmovn_s32(mmtmpD0),vwmovn_s32(mmtmpD1)); + + + dl_ch128+=6; + dl_ch128_2+=6; + rho128+=3; + } + + if (first_symbol_flag==1) { + phy_measurements->rx_correlation[0][aarx] = signal_energy(&rho[aarx][symbol*frame_parms->N_RB_DL*12],rb*12); + } + } + } +#endif } + + +#if defined(__x86_64__) || defined(__i386__) + void prec2A_TM56_128(unsigned char pmi,__m128i *ch0,__m128i *ch1) { @@ -1105,12 +1307,49 @@ void prec2A_TM56_128(unsigned char pmi,__m128i *ch0,__m128i *ch1) _m_empty(); } +#elif defined(__arm__) +void prec2A_TM56_128(unsigned char pmi,int16x8_t* ch0,int16x8_t* ch1) { + int16x8_t amp; + amp = vmovq_n_s16(ONE_OVER_SQRT2_Q15); + + switch (pmi) { + case 0 : // +1 +1 + // print_shorts("phase 0 :ch0",ch0); + // print_shorts("phase 0 :ch1",ch1); + ch0[0] = vqadd_s16(ch0[0],ch1[0]); + break; + + case 1 : // +1 -1 + // print_shorts("phase 1 :ch0",ch0); + // print_shorts("phase 1 :ch1",ch1); + ch0[0] = vqsub_s16(ch0[0],ch1[0]); + // print_shorts("phase 1 :ch0-ch1",ch0); + break; + + case 2 : // +1 +j + ch1[0] = vrev32q_s16(vmul_s16(ch1[0],*(int16x4_t*)conj)); + ch0[0] = vqsub_s16(ch0[0],ch1[0]); + break; // +1 -j + + case 3 : + ch1[0] = vrev32q_s16(vmul_s16(ch1[0],*(int16x4_t*)conj)); + ch0[0] = vqadd_s16(ch0[0],ch1[0]); + break; + } + + ch0[0] = vmulhq_s16(ch0[0],amp); +} + +#endif + // precoding is stream 0 .5(1,1) .5(1,-1) .5(1,1) .5(1,-1) // stream 1 .5(1,-1) .5(1,1) .5(1,-1) .5(1,1) // store "precoded" channel for stream 0 in ch0, stream 1 in ch1 short TM3_prec[8]__attribute__((aligned(16))) = {1,1,-1,-1,1,1,-1,-1} ; +#if defined(__x86_64__) || defined(__i386__) + void prec2A_TM3_128(__m128i *ch0,__m128i *ch1) { @@ -1144,25 +1383,42 @@ void prec2A_TM3_128(__m128i *ch0,__m128i *ch1) _m_empty(); } +#elif defined(__arm__) + +void prec2A_TM3_128(int16x8_t* ch0, int16x8_t* ch1) { + + int16x8_t tmp0,tmp1; + + tmp0 = ch0[0]; + tmp1 = vmulq_s16(ch1[0],((int16x8_t*)&TM3_prec)[0]); + ch0[0] = vhaddq_s16(ch0[0],tmp1); + ch1[0] = vhsubq_s16(tmp0,tmp1); +} + +#endif + // pmi = 0 => stream 0 (1,1), stream 1 (1,-1) // pmi = 1 => stream 0 (1,j), stream 2 (1,-j) +#if defined(__x86_64__) || defined(__i386__) + void prec2A_TM4_128(int pmi,__m128i *ch0,__m128i *ch1) { __m128i amp; amp = _mm_set1_epi16(ONE_OVER_SQRT2_Q15); - __m128i tmp1; + __m128i tmp0,tmp1; if (pmi == 0) { ch0[0] = _mm_adds_epi16(ch0[0],ch1[0]); ch1[0] = _mm_subs_epi16(ch0[0],ch1[0]); } else { + tmp0 = ch0[0]; tmp1 = _mm_sign_epi16(ch1[0],*(__m128i*)&conjugate[0]); tmp1 = _mm_shufflelo_epi16(tmp1,_MM_SHUFFLE(2,3,0,1)); tmp1 = _mm_shufflehi_epi16(tmp1,_MM_SHUFFLE(2,3,0,1)); - ch0[0] = _mm_subs_epi16(ch0[0],tmp1); - ch1[0] = _mm_subs_epi16(ch0[0],tmp1); + ch0[0] = _mm_adds_epi16(tmp0,tmp1); + ch1[0] = _mm_subs_epi16(tmp0,tmp1); } ch0[0] = _mm_mulhi_epi16(ch0[0],amp); @@ -1172,6 +1428,30 @@ void prec2A_TM4_128(int pmi,__m128i *ch0,__m128i *ch1) } +#elif defined(__arm__) + +void prec2A_TM4_128(int pmi,__m128i *ch0,__m128i *ch1) +{ + int16x6_t amp; + int16x8_t tmp0,tmp1; + + amp = = vmovq_n_s16(ONE_OVER_SQRT2_Q15); + + if (pmi == 0) { + ch0[0] = vqadd_s16(ch0[0],ch1[0]); + ch1[0] = vqsub_s16(ch0[0],ch1[0]); + } else { + tmp0 = ch0[0]; + tmp1 = vrev32q_s16(vmul_s16(ch1[0],*(int16x4_t*)conj)); + ch0[0] = vqadd_s16(tmp0,tmp1); + ch1[0] = vqsub_s16(tmp0,tmp1); + } + + ch0[0] = vmulhq_s16(ch0[0],amp); + ch1[0] = vmulhq_s16(ch1[0],amp); +} +#endif + void dlsch_channel_compensation_TM56(int **rxdataF_ext, int **dl_ch_estimates_ext, int **dl_ch_mag, @@ -1188,6 +1468,8 @@ void dlsch_channel_compensation_TM56(int **rxdataF_ext, unsigned char dl_power_off) { +#if defined(__x86_64__) || defined(__i386__) + unsigned short rb,Nre; __m128i *dl_ch0_128,*dl_ch1_128,*dl_ch_mag128,*dl_ch_mag128b,*rxdataF128,*rxdataF_comp128; unsigned char aarx=0,symbol_mod,pilots=0; @@ -1375,6 +1657,169 @@ void dlsch_channel_compensation_TM56(int **rxdataF_ext, _mm_empty(); _m_empty(); + +#elif defined(__arm__) + + unsigned short rb; + unsigned char aatx,aarx,symbol_mod,pilots=0; + + int16x4_t *dl_ch128,*dl_ch128_2,*rxdataF128,*rho128; + int32x4_t mmtmpD0,mmtmpD1; + int16x8_t *dl_ch_mag128,*dl_ch_mag128b,mmtmpD2,mmtmpD3,*rxdataF_comp128; + int16x4_t QAM_amp128,QAM_amp128b; + + int16_t conj[4]__attribute__((aligned(16))) = {1,-1,1,-1}; + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + + if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp))) { + if (frame_parms->mode1_flag==1) // 10 out of 12 so don't reduce size + { nb_rb=1+(5*nb_rb/6); } + + else + { pilots=1; } + } + + + if (mod_order == 4) { + QAM_amp128 = vmovq_n_s16(QAM16_n1); // 2/sqrt(10) + QAM_amp128b = vmovq_n_s16(0); + + } else if (mod_order == 6) { + QAM_amp128 = vmovq_n_s16(QAM64_n1); // + QAM_amp128b = vmovq_n_s16(QAM64_n2); + } + + // printf("comp: rxdataF_comp %p, symbol %d\n",rxdataF_comp[0],symbol); + + for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { + + + + dl_ch1_128 = (int16x4_t*)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch2_128 = (int16x4_t*)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128 = (int16x8_t*)&dl_ch_mag[aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag128b = (int16x8_t*)&dl_ch_magb[aarx][symbol*frame_parms->N_RB_DL*12]; + rxdataF128 = (int16x4_t*)&rxdataF_ext[aarx][symbol*frame_parms->N_RB_DL*12]; + rxdataF_comp128 = (int16x8_t*)&rxdataF_comp[aarx][symbol*frame_parms->N_RB_DL*12]; + + for (rb=0; rb<nb_rb; rb++) { +#ifdef DEBUG_DLSCH_DEMOD + printf("mode 6 prec: rb %d, pmi->%d\n",rb,pmi_ext[rb]); +#endif + prec2A_TM56_128(pmi_ext[rb],&dl_ch0_128[0],&dl_ch1_128[0]); + prec2A_TM56_128(pmi_ext[rb],&dl_ch0_128[1],&dl_ch1_128[1]); + + if (pilots==0) { + prec2A_TM56_128(pmi_ext[rb],&dl_ch0_128[2],&dl_ch1_128[2]); + } + + if (mod_order>2) { + // get channel amplitude if not QPSK + mmtmpD0 = vmull_s16(dl_ch128[0], dl_ch128[0]); + // mmtmpD0 = [ch0*ch0,ch1*ch1,ch2*ch2,ch3*ch3]; + mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0),-output_shift)); + // mmtmpD0 = [ch0*ch0 + ch1*ch1,ch0*ch0 + ch1*ch1,ch2*ch2 + ch3*ch3,ch2*ch2 + ch3*ch3]>>output_shift on 32-bits + mmtmpD1 = vmull_s16(dl_ch128[1], dl_ch128[1]); + mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1),-output_shift)); + mmtmpD2 = vcombine_s16(vqmovn_s32(mmtmpD0),vwmovn_s32(mmtmpD1)); + // mmtmpD2 = [ch0*ch0 + ch1*ch1,ch0*ch0 + ch1*ch1,ch2*ch2 + ch3*ch3,ch2*ch2 + ch3*ch3,ch4*ch4 + ch5*ch5,ch4*ch4 + ch5*ch5,ch6*ch6 + ch7*ch7,ch6*ch6 + ch7*ch7]>>output_shift on 16-bits + mmtmpD0 = vmull_s16(dl_ch128[2], dl_ch128[2]); + mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0),-output_shift)); + mmtmpD1 = vmull_s16(dl_ch128[3], dl_ch128[3]); + mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1),-output_shift)); + mmtmpD3 = vcombine_s16(vqmovn_s32(mmtmpD0),vwmovn_s32(mmtmpD1)); + + if (pilots==0) { + mmtmpD0 = vmull_s16(dl_ch128[4], dl_ch128[4]); + mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0),-output_shift)); + mmtmpD1 = vmull_s16(dl_ch128[5], dl_ch128[5]); + mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1),-output_shift)); + mmtmpD4 = vcombine_s16(vqmovn_s32(mmtmpD0),vwmovn_s32(mmtmpD1)); + + + } + + dl_ch_mag128b[0] = vqdmulhq_s16(mmtmpD2,QAM_amp128b); + dl_ch_mag128b[1] = vqdmulhq_s16(mmtmpD3,QAM_amp128b); + dl_ch_mag128[0] = vqdmulhq_s16(mmtmpD2,QAM_amp128); + dl_ch_mag128[1] = vqdmulhq_s16(mmtmpD3,QAM_amp128); + + + if (pilots==0) { + dl_ch_mag128b[2] = vqdmulhq_s16(mmtmpD4,QAM_amp128b); + dl_ch_mag128[2] = vqdmulhq_s16(mmtmpD4,QAM_amp128); + } + } + + mmtmpD0 = vmull_s16(dl_ch128[0], rx_dataF128[0]); + //mmtmpD0 = [Re(ch[0])Re(rx[0]) Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1]) Im(ch[1])Im(ch[1])] + mmtmpD1 = vmull_s16(dl_ch128[1], rx_dataF128[1]); + //mmtmpD1 = [Re(ch[2])Re(rx[2]) Im(ch[2])Im(ch[2]) Re(ch[3])Re(rx[3]) Im(ch[3])Im(ch[3])] + mmtmpD0 = vpadd_s32(mmtmpD0,mmtmpD1); + //mmtmpD0 = [Re(ch[0])Re(rx[0])+Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1])+Im(ch[1])Im(ch[1]) Re(ch[2])Re(rx[2])+Im(ch[2])Im(ch[2]) Re(ch[3])Re(rx[3])+Im(ch[3])Im(ch[3])] + + mmtmpD0 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch128[0],*(int16x4_t*)conj)), rx_dataF128[0]); + //mmtmpD0 = [-Im(ch[0])Re(rx[0]) Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1]) Re(ch[1])Im(rx[1])] + mmtmpD1 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch128[1],*(int16x4_t*)conj)), rx_dataF128[1]); + //mmtmpD0 = [-Im(ch[2])Re(rx[2]) Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3]) Re(ch[3])Im(rx[3])] + mmtmpD1 = vpadd_s32(mmtmpD0,mmtmpD1); + //mmtmpD1 = [-Im(ch[0])Re(rx[0])+Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1])+Re(ch[1])Im(rx[1]) -Im(ch[2])Re(rx[2])+Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3])+Re(ch[3])Im(rx[3])] + + mmtmpD0 = vqshlq_s32(mmtmpD0,-output_shift); + mmtmpD1 = vqshlq_s32(mmtmpD1,-output_shift); + rxdataF_comp128[0] = vcombine_s16(vqmovn_s32(mmtmpD0),vwmovn_s32(mmtmpD1)); + + mmtmpD0 = vmull_s16(dl_ch128[2], rx_dataF128[2]); + mmtmpD1 = vmull_s16(dl_ch128[3], rx_dataF128[3]); + mmtmpD0 = vpadd_s32(mmtmpD0,mmtmpD1); + mmtmpD0 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch128[2],*(int16x4_t*)conj)), rx_dataF128[2]); + mmtmpD1 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch128[3],*(int16x4_t*)conj)), rx_dataF128[3]); + mmtmpD1 = vpadd_s32(mmtmpD0,mmtmpD1); + + mmtmpD0 = vqshlq_s32(mmtmpD0,-output_shift); + mmtmpD1 = vqshlq_s32(mmtmpD1,-output_shift); + rxdataF_comp128[1] = vcombine_s16(vqmovn_s32(mmtmpD0),vwmovn_s32(mmtmpD1)); + + if (pilots==0) { + mmtmpD0 = vmull_s16(dl_ch128[4], rx_dataF128[4]); + mmtmpD1 = vmull_s16(dl_ch128[5], rx_dataF128[5]); + mmtmpD0 = vpadd_s32(mmtmpD0,mmtmpD1); + mmtmpD0 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch128[4],*(int16x4_t*)conj)), rx_dataF128[4]); + mmtmpD1 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch128[5],*(int16x4_t*)conj)), rx_dataF128[5]); + mmtmpD1 = vpadd_s32(mmtmpD0,mmtmpD1); + + mmtmpD0 = vqshlq_s32(mmtmpD0,-output_shift); + mmtmpD1 = vqshlq_s32(mmtmpD1,-output_shift); + rxdataF_comp128[2] = vcombine_s16(vqmovn_s32(mmtmpD0),vwmovn_s32(mmtmpD1)); + + + dl_ch128+=6; + dl_ch_mag128+=3; + dl_ch_mag128b+=3; + rxdataF128+=6; + rxdataF_comp128+=3; + + } else { // we have a smaller PDSCH in symbols with pilots so skip last group of 4 REs and increment less + dl_ch128+=4; + dl_ch_mag128+=2; + dl_ch_mag128b+=2; + rxdataF128+=4; + rxdataF_comp128+=2; + } + } + + + + Nre = (pilots==0) ? 12 : 8; + + precoded_signal_strength += ((signal_energy_nodc(&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*Nre], + (nb_rb*Nre))*rx_power_correction) - (phy_measurements->n0_power[aarx])); + // rx_antennas + } + phy_measurements->precoded_cqi_dB[eNB_id][0] = dB_fixed2(precoded_signal_strength,phy_measurements->n0_power_tot); + +#endif } void dlsch_channel_compensation_TM3(LTE_DL_FRAME_PARMS *frame_parms, @@ -1389,6 +1834,8 @@ void dlsch_channel_compensation_TM3(LTE_DL_FRAME_PARMS *frame_parms, unsigned char output_shift) { +#if defined(__x86_64__) || defined(__i386__) + unsigned short rb,Nre; __m128i *dl_ch0_128,*dl_ch1_128,*dl_ch_mag0_128,*dl_ch_mag1_128,*dl_ch_mag0_128b,*dl_ch_mag1_128b,*rxdataF128,*rxdataF_comp0_128,*rxdataF_comp1_128; unsigned char aarx=0,symbol_mod,pilots=0; @@ -1724,6 +2171,277 @@ void dlsch_channel_compensation_TM3(LTE_DL_FRAME_PARMS *frame_parms, _mm_empty(); _m_empty(); + +#elif defined(__arm__) + + unsigned short rb; + unsigned char aatx,aarx,symbol_mod,pilots=0; + + int16x4_t *dl_ch128,*dl_ch128_2,*rxdataF128; + int32x4_t mmtmpD0,mmtmpD1; + int16x8_t *dl_ch_mag0_128,*dl_ch_mag1_128b,mmtmpD2,mmtmpD3,*rxdataF_comp0_128,*rxdataF_comp1_128; + int16x4_t QAM_amp0_128,QAM_amp1_128b; + + int **rxdataF_ext = lte_ue_pdsch_vars->rxdataF_ext; + int **dl_ch_estimates_ext = lte_ue_pdsch_vars->dl_ch_estimates_ext; + int **dl_ch_mag0 = lte_ue_pdsch_vars->dl_ch_mag0; + int **dl_ch_mag1 = lte_ue_pdsch_vars->dl_ch_mag1; + int **dl_ch_magb0 = lte_ue_pdsch_vars->dl_ch_magb0; + int **dl_ch_magb1 = lte_ue_pdsch_vars->dl_ch_magb1; + int **rxdataF_comp0 = lte_ue_pdsch_vars->rxdataF_comp0; + int **rxdataF_comp1 = lte_ue_pdsch_vars->rxdataF_comp1[round]; //? + + int16_t conj[4]__attribute__((aligned(16))) = {1,-1,1,-1}; + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + + if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp))) { + if (frame_parms->mode1_flag==1) // 10 out of 12 so don't reduce size + { nb_rb=1+(5*nb_rb/6); } + + else + { pilots=1; } + } + + + if (mod_order0 == 4) { + QAM_amp0_128 = vmovq_n_s16(QAM16_n1); // 2/sqrt(10) + QAM_amp0_128b = vmovq_n_s16(0); + + } else if (mod_order0 == 6) { + QAM_amp0_128 = vmovq_n_s16(QAM64_n1); // + QAM_amp0_128b = vmovq_n_s16(QAM64_n2); + } + + if (mod_order1 == 4) { + QAM_amp1_128 = vmovq_n_s16(QAM16_n1); // 2/sqrt(10) + QAM_amp1_128b = vmovq_n_s16(0); + + } else if (mod_order1 == 6) { + QAM_amp1_128 = vmovq_n_s16(QAM64_n1); // + QAM_amp1_128b = vmovq_n_s16(QAM64_n2); + } + + // printf("comp: rxdataF_comp %p, symbol %d\n",rxdataF_comp[0],symbol); + + for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { + + + + dl_ch1_128 = (int16x4_t*)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch2_128 = (int16x4_t*)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag0_128 = (int16x8_t*)&dl_ch_mag0[aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag0_128b = (int16x8_t*)&dl_ch_mag0b[aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag1_128 = (int16x8_t*)&dl_ch_mag1[aarx][symbol*frame_parms->N_RB_DL*12]; + dl_ch_mag1_128b = (int16x8_t*)&dl_ch_mag1b[aarx][symbol*frame_parms->N_RB_DL*12]; + rxdataF128 = (int16x4_t*)&rxdataF_ext[aarx][symbol*frame_parms->N_RB_DL*12]; + rxdataF_comp0_128 = (int16x8_t*)&rxdataF_comp0[aarx][symbol*frame_parms->N_RB_DL*12]; + rxdataF_comp1_128 = (int16x8_t*)&rxdataF_comp1[aarx][symbol*frame_parms->N_RB_DL*12]; + + for (rb=0; rb<nb_rb; rb++) { +#ifdef DEBUG_DLSCH_DEMOD + printf("mode 6 prec: rb %d, pmi->%d\n",rb,pmi_ext[rb]); +#endif + + prec2A_TM3_128(&dl_ch0_128[0],&dl_ch1_128[0]); + prec2A_TM3_128(&dl_ch0_128[1],&dl_ch1_128[1]); + + if (pilots==0) { + prec2A_TM3_128(&dl_ch0_128[2],&dl_ch1_128[2]); + } + + + if (mod_order0>2) { + // get channel amplitude if not QPSK + mmtmpD0 = vmull_s16(dl_ch0_128[0], dl_ch0_128[0]); + // mmtmpD0 = [ch0*ch0,ch1*ch1,ch2*ch2,ch3*ch3]; + mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0),-output_shift)); + // mmtmpD0 = [ch0*ch0 + ch1*ch1,ch0*ch0 + ch1*ch1,ch2*ch2 + ch3*ch3,ch2*ch2 + ch3*ch3]>>output_shift on 32-bits + mmtmpD1 = vmull_s16(dl_ch0_128[1], dl_ch0_128[1]); + mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1),-output_shift)); + mmtmpD2 = vcombine_s16(vqmovn_s32(mmtmpD0),vwmovn_s32(mmtmpD1)); + // mmtmpD2 = [ch0*ch0 + ch1*ch1,ch0*ch0 + ch1*ch1,ch2*ch2 + ch3*ch3,ch2*ch2 + ch3*ch3,ch4*ch4 + ch5*ch5,ch4*ch4 + ch5*ch5,ch6*ch6 + ch7*ch7,ch6*ch6 + ch7*ch7]>>output_shift on 16-bits + mmtmpD0 = vmull_s16(dl_ch0_128[2], dl_ch0_128[2]); + mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0),-output_shift)); + mmtmpD1 = vmull_s16(dl_ch0_128[3], dl_ch0_128[3]); + mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1),-output_shift)); + mmtmpD3 = vcombine_s16(vqmovn_s32(mmtmpD0),vwmovn_s32(mmtmpD1)); + + if (pilots==0) { + mmtmpD0 = vmull_s16(dl_ch0_128[4], dl_ch0_128[4]); + mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0),-output_shift)); + mmtmpD1 = vmull_s16(dl_ch0_128[5], dl_ch0_128[5]); + mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1),-output_shift)); + mmtmpD4 = vcombine_s16(vqmovn_s32(mmtmpD0),vwmovn_s32(mmtmpD1)); + + + } + + dl_ch_mag0_128b[0] = vqdmulhq_s16(mmtmpD2,QAM_amp0_128b); + dl_ch_mag0_128b[1] = vqdmulhq_s16(mmtmpD3,QAM_amp0_128b); + dl_ch_mag0_128[0] = vqdmulhq_s16(mmtmpD2,QAM_amp0_128); + dl_ch_mag0_128[1] = vqdmulhq_s16(mmtmpD3,QAM_amp0_128); + + + if (pilots==0) { + dl_ch_mag0_128b[2] = vqdmulhq_s16(mmtmpD4,QAM_amp0_128b); + dl_ch_mag0_128[2] = vqdmulhq_s16(mmtmpD4,QAM_amp0_128); + } + } + + if (mod_order1>2) { + // get channel amplitude if not QPSK + mmtmpD0 = vmull_s16(dl_ch1_128[0], dl_ch1_128[0]); + // mmtmpD0 = [ch0*ch0,ch1*ch1,ch2*ch2,ch3*ch3]; + mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0),-output_shift)); + // mmtmpD0 = [ch0*ch0 + ch1*ch1,ch0*ch0 + ch1*ch1,ch2*ch2 + ch3*ch3,ch2*ch2 + ch3*ch3]>>output_shift on 32-bits + mmtmpD1 = vmull_s16(dl_ch1_128[1], dl_ch1_128[1]); + mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1),-output_shift)); + mmtmpD2 = vcombine_s16(vqmovn_s32(mmtmpD0),vwmovn_s32(mmtmpD1)); + // mmtmpD2 = [ch0*ch0 + ch1*ch1,ch0*ch0 + ch1*ch1,ch2*ch2 + ch3*ch3,ch2*ch2 + ch3*ch3,ch4*ch4 + ch5*ch5,ch4*ch4 + ch5*ch5,ch6*ch6 + ch7*ch7,ch6*ch6 + ch7*ch7]>>output_shift on 16-bits + mmtmpD0 = vmull_s16(dl_ch1_128[2], dl_ch1_128[2]); + mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0),-output_shift)); + mmtmpD1 = vmull_s16(dl_ch1_128[3], dl_ch1_128[3]); + mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1),-output_shift)); + mmtmpD3 = vcombine_s16(vqmovn_s32(mmtmpD0),vwmovn_s32(mmtmpD1)); + + if (pilots==0) { + mmtmpD0 = vmull_s16(dl_ch1_128[4], dl_ch1_128[4]); + mmtmpD0 = vqshlq_s32(vqaddq_s32(mmtmpD0,vrev64q_s32(mmtmpD0),-output_shift)); + mmtmpD1 = vmull_s16(dl_ch1_128[5], dl_ch1_128[5]); + mmtmpD1 = vqshlq_s32(vqaddq_s32(mmtmpD1,vrev64q_s32(mmtmpD1),-output_shift)); + mmtmpD4 = vcombine_s16(vqmovn_s32(mmtmpD0),vwmovn_s32(mmtmpD1)); + + + } + + dl_ch_mag1_128b[0] = vqdmulhq_s16(mmtmpD2,QAM_amp1_128b); + dl_ch_mag1_128b[1] = vqdmulhq_s16(mmtmpD3,QAM_amp1_128b); + dl_ch_mag1_128[0] = vqdmulhq_s16(mmtmpD2,QAM_amp1_128); + dl_ch_mag1_128[1] = vqdmulhq_s16(mmtmpD3,QAM_amp1_128); + + + if (pilots==0) { + dl_ch_mag1_128b[2] = vqdmulhq_s16(mmtmpD4,QAM_amp1_128b); + dl_ch_mag1_128[2] = vqdmulhq_s16(mmtmpD4,QAM_amp1_128); + } + } + + mmtmpD0 = vmull_s16(dl_ch0_128[0], rx_dataF128[0]); + //mmtmpD0 = [Re(ch[0])Re(rx[0]) Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1]) Im(ch[1])Im(ch[1])] + mmtmpD1 = vmull_s16(dl_ch0_128[1], rx_dataF128[1]); + //mmtmpD1 = [Re(ch[2])Re(rx[2]) Im(ch[2])Im(ch[2]) Re(ch[3])Re(rx[3]) Im(ch[3])Im(ch[3])] + mmtmpD0 = vpadd_s32(mmtmpD0,mmtmpD1); + //mmtmpD0 = [Re(ch[0])Re(rx[0])+Im(ch[0])Im(ch[0]) Re(ch[1])Re(rx[1])+Im(ch[1])Im(ch[1]) Re(ch[2])Re(rx[2])+Im(ch[2])Im(ch[2]) Re(ch[3])Re(rx[3])+Im(ch[3])Im(ch[3])] + + mmtmpD0 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch0_128[0],*(int16x4_t*)conj)), rx_dataF128[0]); + //mmtmpD0 = [-Im(ch[0])Re(rx[0]) Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1]) Re(ch[1])Im(rx[1])] + mmtmpD1 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch0_128[1],*(int16x4_t*)conj)), rx_dataF128[1]); + //mmtmpD0 = [-Im(ch[2])Re(rx[2]) Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3]) Re(ch[3])Im(rx[3])] + mmtmpD1 = vpadd_s32(mmtmpD0,mmtmpD1); + //mmtmpD1 = [-Im(ch[0])Re(rx[0])+Re(ch[0])Im(rx[0]) -Im(ch[1])Re(rx[1])+Re(ch[1])Im(rx[1]) -Im(ch[2])Re(rx[2])+Re(ch[2])Im(rx[2]) -Im(ch[3])Re(rx[3])+Re(ch[3])Im(rx[3])] + + mmtmpD0 = vqshlq_s32(mmtmpD0,-output_shift); + mmtmpD1 = vqshlq_s32(mmtmpD1,-output_shift); + rxdataF_comp0_128[0] = vcombine_s16(vqmovn_s32(mmtmpD0),vwmovn_s32(mmtmpD1)); + + mmtmpD0 = vmull_s16(dl_ch0_128[2], rx_dataF128[2]); + mmtmpD1 = vmull_s16(dl_ch0_128[3], rx_dataF128[3]); + mmtmpD0 = vpadd_s32(mmtmpD0,mmtmpD1); + mmtmpD0 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch0_128[2],*(int16x4_t*)conj)), rx_dataF128[2]); + mmtmpD1 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch0_128[3],*(int16x4_t*)conj)), rx_dataF128[3]); + mmtmpD1 = vpadd_s32(mmtmpD0,mmtmpD1); + + mmtmpD0 = vqshlq_s32(mmtmpD0,-output_shift); + mmtmpD1 = vqshlq_s32(mmtmpD1,-output_shift); + rxdataF_comp0_128[1] = vcombine_s16(vqmovn_s32(mmtmpD0),vwmovn_s32(mmtmpD1)); + + + mmtmpD0 = vmull_s16(dl_ch1_128[0], rx_dataF128[0]); + mmtmpD1 = vmull_s16(dl_ch1_128[1], rx_dataF128[1]); + mmtmpD0 = vpadd_s32(mmtmpD0,mmtmpD1); + mmtmpD0 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch0_128[0],*(int16x4_t*)conj)), rx_dataF128[0]); + mmtmpD1 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch0_128[1],*(int16x4_t*)conj)), rx_dataF128[1]); + mmtmpD1 = vpadd_s32(mmtmpD0,mmtmpD1); + mmtmpD0 = vqshlq_s32(mmtmpD0,-output_shift); + mmtmpD1 = vqshlq_s32(mmtmpD1,-output_shift); + rxdataF_comp1_128[0] = vcombine_s16(vqmovn_s32(mmtmpD0),vwmovn_s32(mmtmpD1)); + + mmtmpD0 = vmull_s16(dl_ch1_128[2], rx_dataF128[2]); + mmtmpD1 = vmull_s16(dl_ch1_128[3], rx_dataF128[3]); + mmtmpD0 = vpadd_s32(mmtmpD0,mmtmpD1); + mmtmpD0 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch0_128[2],*(int16x4_t*)conj)), rx_dataF128[2]); + mmtmpD1 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch0_128[3],*(int16x4_t*)conj)), rx_dataF128[3]); + mmtmpD1 = vpadd_s32(mmtmpD0,mmtmpD1); + + mmtmpD0 = vqshlq_s32(mmtmpD0,-output_shift); + mmtmpD1 = vqshlq_s32(mmtmpD1,-output_shift); + rxdataF_comp1_128[1] = vcombine_s16(vqmovn_s32(mmtmpD0),vwmovn_s32(mmtmpD1)); + + if (pilots==0) { + mmtmpD0 = vmull_s16(dl_ch0_128[4], rx_dataF128[4]); + mmtmpD1 = vmull_s16(dl_ch0_128[5], rx_dataF128[5]); + mmtmpD0 = vpadd_s32(mmtmpD0,mmtmpD1); + mmtmpD0 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch0_128[4],*(int16x4_t*)conj)), rx_dataF128[4]); + mmtmpD1 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch0_128[5],*(int16x4_t*)conj)), rx_dataF128[5]); + mmtmpD1 = vpadd_s32(mmtmpD0,mmtmpD1); + + mmtmpD0 = vqshlq_s32(mmtmpD0,-output_shift); + mmtmpD1 = vqshlq_s32(mmtmpD1,-output_shift); + rxdataF_comp0_128[2] = vcombine_s16(vqmovn_s32(mmtmpD0),vwmovn_s32(mmtmpD1)); + + mmtmpD0 = vmull_s16(dl_ch1_128[4], rx_dataF128[4]); + mmtmpD1 = vmull_s16(dl_ch1_128[5], rx_dataF128[5]); + mmtmpD0 = vpadd_s32(mmtmpD0,mmtmpD1); + mmtmpD0 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch1_128[4],*(int16x4_t*)conj)), rx_dataF128[4]); + mmtmpD1 = vmull_s16(vrev32q_s16(vmulq_s16(dl_ch1_128[5],*(int16x4_t*)conj)), rx_dataF128[5]); + mmtmpD1 = vpadd_s32(mmtmpD0,mmtmpD1); + + mmtmpD0 = vqshlq_s32(mmtmpD0,-output_shift); + mmtmpD1 = vqshlq_s32(mmtmpD1,-output_shift); + rxdataF_comp1_128[2] = vcombine_s16(vqmovn_s32(mmtmpD0),vwmovn_s32(mmtmpD1)); + + + dl_ch0_128+=6; + dl_ch1_128+=6; + dl_ch_mag0_128+=3; + dl_ch_mag0_128b+=3; + dl_ch_mag1_128+=3; + dl_ch_mag1_128b+=3; + rxdataF128+=6; + rxdataF_comp0_128+=3; + rxdataF_comp1_128+=3; + + } else { // we have a smaller PDSCH in symbols with pilots so skip last group of 4 REs and increment less + dl_ch0_128+=4; + dl_ch1_128+=4; + dl_ch_mag0_128+=2; + dl_ch_mag0_128b+=2; + dl_ch_mag1_128+=2; + dl_ch_mag1_128b+=2; + rxdataF128+=4; + rxdataF_comp0_128+=2; + rxdataF_comp1_128+=2; + } + } + + + + Nre = (pilots==0) ? 12 : 8; + + precoded_signal_strength0 += ((signal_energy_nodc(&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*Nre], + (nb_rb*Nre))*rx_power_correction) - (phy_measurements->n0_power[aarx])); + + precoded_signal_strength1 += ((signal_energy_nodc(&dl_ch_estimates_ext[aarx+2][symbol*frame_parms->N_RB_DL*Nre], + (nb_rb*Nre))*rx_power_correction) - (phy_measurements->n0_power[aarx])); + + // rx_antennas + } + + phy_measurements->precoded_cqi_dB[eNB_id][0] = dB_fixed2(precoded_signal_strength0,phy_measurements->n0_power_tot); + phy_measurements->precoded_cqi_dB[eNB_id][1] = dB_fixed2(precoded_signal_strength1,phy_measurements->n0_power_tot); + +#endif } void dlsch_dual_stream_correlation(LTE_DL_FRAME_PARMS *frame_parms, @@ -1735,6 +2453,8 @@ void dlsch_dual_stream_correlation(LTE_DL_FRAME_PARMS *frame_parms, unsigned char output_shift) { +#if defined(__x86_64__)||defined(__i386__) + unsigned short rb; __m128i *dl_ch128,*dl_ch128i,*dl_ch_rho128,mmtmpD0,mmtmpD1,mmtmpD2,mmtmpD3; unsigned char aarx,symbol_mod,pilots=0; @@ -1824,6 +2544,10 @@ void dlsch_dual_stream_correlation(LTE_DL_FRAME_PARMS *frame_parms, _mm_empty(); _m_empty(); + +#elif defined(__arm__) + +#endif } void dlsch_detection_mrc(LTE_DL_FRAME_PARMS *frame_parms, @@ -1840,6 +2564,8 @@ void dlsch_detection_mrc(LTE_DL_FRAME_PARMS *frame_parms, unsigned char dual_stream_UE) { +#if defined(__x86_64__)||defined(__i386__) + unsigned char aatx; int i; __m128i *rxdataF_comp128_0,*rxdataF_comp128_1,*rxdataF_comp128_i0,*rxdataF_comp128_i1,*dl_ch_mag128_0,*dl_ch_mag128_1,*dl_ch_mag128_0b,*dl_ch_mag128_1b,*rho128_0,*rho128_1,*rho128_i0,*rho128_i1, @@ -1898,6 +2624,10 @@ void dlsch_detection_mrc(LTE_DL_FRAME_PARMS *frame_parms, _mm_empty(); _m_empty(); + +#elif defined(__arm__) + +#endif } void dlsch_scale_channel(int **dl_ch_estimates_ext, @@ -1907,6 +2637,8 @@ void dlsch_scale_channel(int **dl_ch_estimates_ext, unsigned short nb_rb) { +#if defined(__x86_64__)||defined(__i386__) + short rb, ch_amp; unsigned char aatx,aarx,pilots=0,symbol_mod; __m128i *dl_ch128, ch_amp128; @@ -1949,6 +2681,9 @@ void dlsch_scale_channel(int **dl_ch_estimates_ext, } } } +#elif defined(__arm__) + +#endif } //compute average channel_level on each (TX,RX) antenna pair @@ -1959,6 +2694,8 @@ void dlsch_channel_level(int **dl_ch_estimates_ext, unsigned short nb_rb) { +#if defined(__x86_64__)||defined(__i386__) + short rb; unsigned char aatx,aarx,nre=12,symbol_mod; __m128i *dl_ch128,avg128D; @@ -2013,6 +2750,9 @@ void dlsch_channel_level(int **dl_ch_estimates_ext, _mm_empty(); _m_empty(); +#elif defined(__arm__) + +#endif } //compute average channel_level of effective (precoded) channel @@ -2022,6 +2762,7 @@ void dlsch_channel_level_TM3(int **dl_ch_estimates_ext, uint8_t symbol, unsigned short nb_rb) { +#if defined(__x86_64__)||defined(__i386__) short rb; unsigned char aarx,nre=12,symbol_mod; @@ -2089,6 +2830,10 @@ void dlsch_channel_level_TM3(int **dl_ch_estimates_ext, _mm_empty(); _m_empty(); + +#elif defined(__arm__) + +#endif } //compute average channel_level of effective (precoded) channel @@ -2100,6 +2845,8 @@ void dlsch_channel_level_TM56(int **dl_ch_estimates_ext, unsigned short nb_rb) { +#if defined(__x86_64__)||defined(__i386__) + short rb; unsigned char aarx,nre=12,symbol_mod; __m128i *dl_ch0_128,*dl_ch1_128, dl_ch0_128_tmp, dl_ch1_128_tmp,avg128D; @@ -2166,6 +2913,11 @@ void dlsch_channel_level_TM56(int **dl_ch_estimates_ext, _mm_empty(); _m_empty(); + +#elif defined(__arm__) + + +#endif } @@ -2177,6 +2929,7 @@ void dlsch_alamouti(LTE_DL_FRAME_PARMS *frame_parms, unsigned short nb_rb) { +#if defined(__x86_64__)||defined(__i386__) short *rxF0,*rxF1; __m128i *ch_mag0,*ch_mag1,*ch_mag0b,*ch_mag1b, amp, *rxF0_128; @@ -2257,30 +3010,13 @@ void dlsch_alamouti(LTE_DL_FRAME_PARMS *frame_parms, _mm_empty(); _m_empty(); -} -void dlsch_antcyc(LTE_DL_FRAME_PARMS *frame_parms, - int **rxdataF_comp, - int **dl_ch_mag, - int **dl_ch_magb, - unsigned char symbol, - unsigned short nb_rb) -{ - - unsigned char rb,re; - int jj=1+(symbol*frame_parms->N_RB_DL*12); +#elif defined(__arm__) - // printf("Doing antcyc rx\n"); - for (rb=0; rb<nb_rb; rb++) { - for (re=0; re<12; re+=2) { - rxdataF_comp[0][jj] = rxdataF_comp[2][jj]; //copy odd carriers from tx antenna 1 - dl_ch_mag[0][jj] = dl_ch_mag[2][jj]; - dl_ch_magb[0][jj] = dl_ch_magb[2][jj]; - jj+=2; - } - } +#endif } + //============================================================================================== // Extraction functions //==============================================================================================