diff --git a/openair1/PHY/LTE_ESTIMATION/lte_ul_channel_estimation.c b/openair1/PHY/LTE_ESTIMATION/lte_ul_channel_estimation.c index c867292942f840d3bffd2891769ddf9b0d4dc7ca..e42f5f787c6fbb8feabe60f2cb4c58f875634d92 100644 --- a/openair1/PHY/LTE_ESTIMATION/lte_ul_channel_estimation.c +++ b/openair1/PHY/LTE_ESTIMATION/lte_ul_channel_estimation.c @@ -41,10 +41,10 @@ extern int16_t *ul_ref_sigs_rx[30][2][34]; int32_t lte_ul_channel_estimation(LTE_DL_FRAME_PARMS *frame_parms, L1_rxtx_proc_t *proc, - LTE_eNB_ULSCH_t * ulsch, - int32_t **ul_ch_estimates, - int32_t **ul_ch_estimates_time, - int32_t **rxdataF_ext, + LTE_eNB_ULSCH_t * ulsch, + int32_t **ul_ch_estimates, + int32_t **ul_ch_estimates_time, + int32_t **rxdataF_ext, module_id_t UE_id, unsigned char l, unsigned char Ns) { @@ -88,7 +88,7 @@ int32_t lte_ul_channel_estimation(LTE_DL_FRAME_PARMS *frame_parms, } uint16_t N_rb_alloc = ulsch->harq_processes[harq_pid]->nb_rb; - int32_t tmp_estimates[N_rb_alloc*12] __attribute__((aligned(16))); + int32_t tmp_estimates[N_rb_alloc*12] __attribute__((aligned(32))); Msc_RS = N_rb_alloc*12; cyclic_shift = (frame_parms->pusch_config_common.ul_ReferenceSignalsPUSCH.cyclicShift + ulsch->harq_processes[harq_pid]->n_DMRS2 + @@ -334,14 +334,14 @@ int32_t lte_ul_channel_estimation(LTE_DL_FRAME_PARMS *frame_parms, current_phase2 = cmin(abs(current_phase2),127); // msg("sym: %d, current_phase1: %d, ru: %d + j%d, current_phase2: %d, ru: %d + j%d\n",k,current_phase1,ru1[2*current_phase1],ru1[2*current_phase1+1],current_phase2,ru2[2*current_phase2],ru2[2*current_phase2+1]); // rotate channel estimates by estimated phase - rotate_cpx_vector((int16_t *) ul_ch1, - &ru1[2*current_phase1], - (int16_t *) &ul_ch_estimates[aa][frame_parms->N_RB_UL*12*k], + rotate_cpx_vector((c16_t *) ul_ch1, + (c16_t *)&ru1[2*current_phase1], + (c16_t *) &ul_ch_estimates[aa][frame_parms->N_RB_UL*12*k], Msc_RS, 15); - rotate_cpx_vector((int16_t *) ul_ch2, - &ru2[2*current_phase2], - (int16_t *) &tmp_estimates[0], + rotate_cpx_vector((c16_t *) ul_ch2, + (c16_t *)&ru2[2*current_phase2], + (c16_t *) &tmp_estimates[0], Msc_RS, 15); // Combine the two rotated estimates @@ -657,14 +657,14 @@ int32_t lte_ul_channel_estimation_RRU(LTE_DL_FRAME_PARMS *frame_parms, current_phase2 = cmin(abs(current_phase2),127); // msg("sym: %d, current_phase1: %d, ru: %d + j%d, current_phase2: %d, ru: %d + j%d\n",k,current_phase1,ru1[2*current_phase1],ru1[2*current_phase1+1],current_phase2,ru2[2*current_phase2],ru2[2*current_phase2+1]); // rotate channel estimates by estimated phase - rotate_cpx_vector((int16_t *) ul_ch1, - &ru1[2*current_phase1], - (int16_t *) &ul_ch_estimates[aa][frame_parms->N_RB_UL*12*k], + rotate_cpx_vector((c16_t *) ul_ch1, + (c16_t *)&ru1[2*current_phase1], + (c16_t *) &ul_ch_estimates[aa][frame_parms->N_RB_UL*12*k], Msc_RS, 15); - rotate_cpx_vector((int16_t *) ul_ch2, - &ru2[2*current_phase2], - (int16_t *) &tmp_estimates[0], + rotate_cpx_vector((c16_t *) ul_ch2, + (c16_t *)&ru2[2*current_phase2], + (c16_t *) &tmp_estimates[0], Msc_RS, 15); // Combine the two rotated estimates diff --git a/openair1/PHY/MODULATION/ofdm_mod.c b/openair1/PHY/MODULATION/ofdm_mod.c index 2f075e8218d7da918b8826a6167014aef7e93d95..678eac2f3addfc0f4801743bc9c232bd0a268b34 100644 --- a/openair1/PHY/MODULATION/ofdm_mod.c +++ b/openair1/PHY/MODULATION/ofdm_mod.c @@ -360,9 +360,9 @@ void apply_nr_rotation(NR_DL_FRAME_PARMS *fp, symbol_rotation[2 * (sidx + first_symbol + symb_offset)], symbol_rotation[1 + 2 * (sidx + first_symbol + symb_offset)]); - rotate_cpx_vector(trxdata + (sidx * length * 2), - &symbol_rotation[2 * (sidx + first_symbol + symb_offset)], - trxdata + (sidx * length * 2), + rotate_cpx_vector((c16_t*)trxdata + (sidx * length * 2), + (c16_t*)&symbol_rotation[2 * (sidx + first_symbol + symb_offset)], + (c16_t*) trxdata + (sidx * length * 2), length, 15); } diff --git a/openair1/PHY/MODULATION/slot_fep_nr.c b/openair1/PHY/MODULATION/slot_fep_nr.c index 139b730ce1bb515046340fef8fc6394d5c22451a..5b0c4dc23a808fe0d5042b3d977447b0cceb1ead 100644 --- a/openair1/PHY/MODULATION/slot_fep_nr.c +++ b/openair1/PHY/MODULATION/slot_fep_nr.c @@ -98,18 +98,18 @@ int nr_slot_fep(PHY_VARS_NR_UE *ue, stop_meas(&ue->rx_dft_stats); int symb_offset = (Ns%frame_parms->slots_per_subframe)*frame_parms->symbols_per_slot; - int32_t rot2 = ((uint32_t*)frame_parms->symbol_rotation[0])[symbol+symb_offset]; - ((int16_t*)&rot2)[1]=-((int16_t*)&rot2)[1]; + c16_t rot2 = ((c16_t*)frame_parms->symbol_rotation[0])[symbol+symb_offset]; + rot2.i=-rot2.i; #ifdef DEBUG_FEP // if (ue->frame <100) printf("slot_fep: slot %d, symbol %d rx_offset %u, rotation symbol %d %d.%d\n", Ns,symbol, rx_offset, - symbol+symb_offset,((int16_t*)&rot2)[0],((int16_t*)&rot2)[1]); + symbol+symb_offset,rot2.r,rot2.i); #endif - rotate_cpx_vector((int16_t *)&common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF[aa][frame_parms->ofdm_symbol_size*symbol], - (int16_t*)&rot2, - (int16_t *)&common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF[aa][frame_parms->ofdm_symbol_size*symbol], + rotate_cpx_vector((c16_t *)&common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF[aa][frame_parms->ofdm_symbol_size*symbol], + &rot2, + (c16_t *)&common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF[aa][frame_parms->ofdm_symbol_size*symbol], frame_parms->ofdm_symbol_size, 15); @@ -214,18 +214,18 @@ int nr_slot_fep_init_sync(PHY_VARS_NR_UE *ue, stop_meas(&ue->rx_dft_stats); int symb_offset = (Ns%frame_parms->slots_per_subframe)*frame_parms->symbols_per_slot; - int32_t rot2 = ((uint32_t*)frame_parms->symbol_rotation[0])[symbol + symb_offset]; - ((int16_t*)&rot2)[1]=-((int16_t*)&rot2)[1]; + c16_t rot2 = ((c16_t*)frame_parms->symbol_rotation[0])[symbol + symb_offset]; + rot2.i=-rot2.i; #ifdef DEBUG_FEP // if (ue->frame <100) printf("slot_fep: slot %d, symbol %d rx_offset %u, rotation symbol %d %d.%d\n", Ns,symbol, rx_offset, - symbol+symb_offset,((int16_t*)&rot2)[0],((int16_t*)&rot2)[1]); + symbol+symb_offset,rot2.r,rot2.i); #endif - rotate_cpx_vector((int16_t *)&common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF[aa][frame_parms->ofdm_symbol_size*symbol], - (int16_t*)&rot2, - (int16_t *)&common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF[aa][frame_parms->ofdm_symbol_size*symbol], + rotate_cpx_vector((c16_t *)&common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF[aa][frame_parms->ofdm_symbol_size*symbol], + &rot2, + (c16_t *)&common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF[aa][frame_parms->ofdm_symbol_size*symbol], frame_parms->ofdm_symbol_size, 15); } @@ -310,12 +310,12 @@ void apply_nr_rotation_ul(NR_DL_FRAME_PARMS *frame_parms, for (int symbol=first_symbol;symbol<nsymb;symbol++) { - uint32_t rot2 = ((uint32_t*)frame_parms->symbol_rotation[1])[symbol + symb_offset]; - ((int16_t*)&rot2)[1]=-((int16_t*)&rot2)[1]; - LOG_D(PHY,"slot %d, symb_offset %d rotating by %d.%d\n",slot,symb_offset,((int16_t*)&rot2)[0],((int16_t*)&rot2)[1]); - rotate_cpx_vector((int16_t *)&rxdataF[soffset+(frame_parms->ofdm_symbol_size*symbol)], - (int16_t*)&rot2, - (int16_t *)&rxdataF[soffset+(frame_parms->ofdm_symbol_size*symbol)], + c16_t rot2 = ((c16_t*)frame_parms->symbol_rotation[1])[symbol + symb_offset]; + rot2.i=-rot2.i; + LOG_D(PHY,"slot %d, symb_offset %d rotating by %d.%d\n",slot,symb_offset,rot2.r,rot2.i); + rotate_cpx_vector((c16_t *)&rxdataF[soffset+(frame_parms->ofdm_symbol_size*symbol)], + &rot2, + (c16_t *)&rxdataF[soffset+(frame_parms->ofdm_symbol_size*symbol)], length, 15); diff --git a/openair1/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c b/openair1/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c index cd3b535b3fd8024b1a3040bd1d1ecd53823bb446..5404afd4321877a1bf85741eb3bf26d47b328f66 100644 --- a/openair1/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c +++ b/openair1/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c @@ -854,7 +854,7 @@ void nr_pusch_ptrs_processing(PHY_VARS_gNB *gNB, uint32_t nb_re_pusch) { //#define DEBUG_UL_PTRS 1 - int16_t *phase_per_symbol = NULL; + c16_t *phase_per_symbol = NULL; int32_t *ptrs_re_symbol = NULL; int8_t ret = 0; @@ -871,20 +871,20 @@ void nr_pusch_ptrs_processing(PHY_VARS_gNB *gNB, uint8_t *ptrsReOffset = &rel15_ul->pusch_ptrs.ptrs_ports_list[0].ptrs_re_offset; /* loop over antennas */ for (int aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { - phase_per_symbol = (int16_t*)gNB->pusch_vars[ulsch_id]->ptrs_phase_per_slot[aarx]; + phase_per_symbol = (c16_t*)gNB->pusch_vars[ulsch_id]->ptrs_phase_per_slot[aarx]; ptrs_re_symbol = &gNB->pusch_vars[ulsch_id]->ptrs_re_per_slot; *ptrs_re_symbol = 0; - phase_per_symbol[(2*symbol)+1] = 0; // Imag + phase_per_symbol[symbol].i = 0; /* set DMRS estimates to 0 angle with magnitude 1 */ if(is_dmrs_symbol(symbol,*dmrsSymbPos)) { /* set DMRS real estimation to 32767 */ - phase_per_symbol[2*symbol]=(int16_t)((1<<15)-1); // 32767 + phase_per_symbol[symbol].r=INT16_MAX; // 32767 #ifdef DEBUG_UL_PTRS - printf("[PHY][PTRS]: DMRS Symbol %d -> %4d + j*%4d\n", symbol, phase_per_symbol[2*symbol],phase_per_symbol[(2*symbol)+1]); + printf("[PHY][PTRS]: DMRS Symbol %d -> %4d + j*%4d\n", symbol, phase_per_symbol[symbol].r,phase_per_symbol[symbol].i); #endif } else {// real ptrs value is set to 0 - phase_per_symbol[2*symbol] = 0; // Real + phase_per_symbol[symbol].r = 0; } if(symbol == *startSymbIndex) { @@ -909,7 +909,7 @@ void nr_pusch_ptrs_processing(PHY_VARS_gNB *gNB, symbol,frame_parms->ofdm_symbol_size, (int16_t*)&gNB->pusch_vars[ulsch_id]->rxdataF_comp[aarx][(symbol * nb_re_pusch)], gNB->nr_gold_pusch_dmrs[rel15_ul->scid][nr_tti_rx][symbol], - &phase_per_symbol[2* symbol], + (int16_t*)&phase_per_symbol[symbol], ptrs_re_symbol); } /* For last OFDM symbol at each antenna perform interpolation and compensation for the slot*/ @@ -919,7 +919,7 @@ void nr_pusch_ptrs_processing(PHY_VARS_gNB *gNB, /*------------------------------------------------------------------------------------------------------- */ /* If L-PTRS is > 0 then we need interpolation */ if(*L_ptrs > 0) { - ret = nr_ptrs_process_slot(*dmrsSymbPos, *ptrsSymbPos, phase_per_symbol, *startSymbIndex, *nbSymb); + ret = nr_ptrs_process_slot(*dmrsSymbPos, *ptrsSymbPos, (int16_t*)phase_per_symbol, *startSymbIndex, *nbSymb); if(ret != 0) { LOG_W(PHY,"[PTRS] Compensation is skipped due to error in PTRS slot processing !!\n"); } @@ -938,11 +938,11 @@ void nr_pusch_ptrs_processing(PHY_VARS_gNB *gNB, /* Skip rotation if the slot processing is wrong */ if((!is_dmrs_symbol(i,*dmrsSymbPos)) && (ret == 0)) { #ifdef DEBUG_UL_PTRS - printf("[PHY][UL][PTRS]: Rotate Symbol %2d with %d + j* %d\n", i, phase_per_symbol[2* i],phase_per_symbol[(2* i) +1]); + printf("[PHY][UL][PTRS]: Rotate Symbol %2d with %d + j* %d\n", i, phase_per_symbol[i].r,phase_per_symbol[i].i); #endif - rotate_cpx_vector((int16_t*)&gNB->pusch_vars[ulsch_id]->rxdataF_comp[aarx][(i * rel15_ul->rb_size * NR_NB_SC_PER_RB)], - &phase_per_symbol[2* i], - (int16_t*)&gNB->pusch_vars[ulsch_id]->rxdataF_comp[aarx][(i * rel15_ul->rb_size * NR_NB_SC_PER_RB)], + rotate_cpx_vector((c16_t*)&gNB->pusch_vars[ulsch_id]->rxdataF_comp[aarx][(i * rel15_ul->rb_size * NR_NB_SC_PER_RB)], + &phase_per_symbol[i], + (c16_t*)&gNB->pusch_vars[ulsch_id]->rxdataF_comp[aarx][(i * rel15_ul->rb_size * NR_NB_SC_PER_RB)], ((*nb_rb) * NR_NB_SC_PER_RB), 15); }// if not DMRS Symbol }// symbol loop diff --git a/openair1/PHY/NR_UE_ESTIMATION/nr_dl_channel_estimation.c b/openair1/PHY/NR_UE_ESTIMATION/nr_dl_channel_estimation.c index 8936a69e70b764738ecb9cc818b1890116346282..ba7e61afe67b6972d542d35144c9ced773845b9c 100644 --- a/openair1/PHY/NR_UE_ESTIMATION/nr_dl_channel_estimation.c +++ b/openair1/PHY/NR_UE_ESTIMATION/nr_dl_channel_estimation.c @@ -1657,7 +1657,6 @@ void nr_pdsch_ptrs_processing(PHY_VARS_NR_UE *ue, RX_type_t rx_type) { //#define DEBUG_DL_PTRS 1 - int16_t *phase_per_symbol = NULL; int32_t *ptrs_re_symbol = NULL; int8_t ret = 0; /* harq specific variables */ @@ -1701,20 +1700,20 @@ void nr_pdsch_ptrs_processing(PHY_VARS_NR_UE *ue, } /* loop over antennas */ for (int aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { - phase_per_symbol = (int16_t*)pdsch_vars[gNB_id]->ptrs_phase_per_slot[aarx]; + c16_t *phase_per_symbol = (c16_t*)pdsch_vars[gNB_id]->ptrs_phase_per_slot[aarx]; ptrs_re_symbol = (int32_t*)pdsch_vars[gNB_id]->ptrs_re_per_slot[aarx]; ptrs_re_symbol[symbol] = 0; - phase_per_symbol[(2*symbol)+1] = 0; // Imag + phase_per_symbol[symbol].i = 0; // Imag /* set DMRS estimates to 0 angle with magnitude 1 */ if(is_dmrs_symbol(symbol,*dmrsSymbPos)) { /* set DMRS real estimation to 32767 */ - phase_per_symbol[2*symbol]=(int16_t)((1<<15)-1); // 32767 + phase_per_symbol[symbol].r=INT16_MAX; // 32767 #ifdef DEBUG_DL_PTRS - printf("[PHY][PTRS]: DMRS Symbol %d -> %4d + j*%4d\n", symbol, phase_per_symbol[2*symbol],phase_per_symbol[(2*symbol)+1]); + printf("[PHY][PTRS]: DMRS Symbol %d -> %4d + j*%4d\n", symbol, phase_per_symbol[symbol].r,phase_per_symbol[symbol].i); #endif } else { // real ptrs value is set to 0 - phase_per_symbol[2*symbol] = 0; // Real + phase_per_symbol[symbol].r = 0; // Real } if(dlsch0_harq->status == ACTIVE) { @@ -1740,7 +1739,7 @@ void nr_pdsch_ptrs_processing(PHY_VARS_NR_UE *ue, symbol,frame_parms->ofdm_symbol_size, (int16_t*)&pdsch_vars[gNB_id]->rxdataF_comp0[aarx][(symbol * nb_re_pdsch)], ue->nr_gold_pdsch[gNB_id][nr_slot_rx][symbol][0], - &phase_per_symbol[2* symbol], + (int16_t*)&phase_per_symbol[symbol], &ptrs_re_symbol[symbol]); } }// HARQ 0 @@ -1752,7 +1751,7 @@ void nr_pdsch_ptrs_processing(PHY_VARS_NR_UE *ue, /*------------------------------------------------------------------------------------------------------- */ /* If L-PTRS is > 0 then we need interpolation */ if(*L_ptrs > 0) { - ret = nr_ptrs_process_slot(*dmrsSymbPos, *ptrsSymbPos, phase_per_symbol, *startSymbIndex, *nbSymb); + ret = nr_ptrs_process_slot(*dmrsSymbPos, *ptrsSymbPos, (int16_t*)phase_per_symbol, *startSymbIndex, *nbSymb); if(ret != 0) { LOG_W(PHY,"[PTRS] Compensation is skipped due to error in PTRS slot processing !!\n"); } @@ -1771,11 +1770,11 @@ void nr_pdsch_ptrs_processing(PHY_VARS_NR_UE *ue, /* Skip rotation if the slot processing is wrong */ if((!is_dmrs_symbol(i,*dmrsSymbPos)) && (ret == 0)) { #ifdef DEBUG_DL_PTRS - printf("[PHY][DL][PTRS]: Rotate Symbol %2d with %d + j* %d\n", i, phase_per_symbol[2* i],phase_per_symbol[(2* i) +1]); + printf("[PHY][DL][PTRS]: Rotate Symbol %2d with %d + j* %d\n", i, phase_per_symbol[i].r,phase_per_symbol[i].i); #endif - rotate_cpx_vector((int16_t*)&pdsch_vars[gNB_id]->rxdataF_comp0[aarx][(i * (*nb_rb) * NR_NB_SC_PER_RB)], - &phase_per_symbol[2* i], - (int16_t*)&pdsch_vars[gNB_id]->rxdataF_comp0[aarx][(i * (*nb_rb) * NR_NB_SC_PER_RB)], + rotate_cpx_vector((c16_t*)&pdsch_vars[gNB_id]->rxdataF_comp0[aarx][(i * (*nb_rb) * NR_NB_SC_PER_RB)], + &phase_per_symbol[i], + (c16_t*)&pdsch_vars[gNB_id]->rxdataF_comp0[aarx][(i * (*nb_rb) * NR_NB_SC_PER_RB)], ((*nb_rb) * NR_NB_SC_PER_RB), 15); }// if not DMRS Symbol }// symbol loop diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_ulsch_ue.c b/openair1/PHY/NR_UE_TRANSPORT/nr_ulsch_ue.c index a07a19d085f43c0628416d699595fdaf09126d43..22d7d5689f60abe5734284b3f0ec406050d8e17a 100644 --- a/openair1/PHY/NR_UE_TRANSPORT/nr_ulsch_ue.c +++ b/openair1/PHY/NR_UE_TRANSPORT/nr_ulsch_ue.c @@ -597,17 +597,16 @@ uint8_t nr_ue_pusch_common_procedures(PHY_VARS_NR_UE *UE, int symb_offset = (slot%frame_parms->slots_per_subframe)*frame_parms->symbols_per_slot; for(ap = 0; ap < n_antenna_ports; ap++) { for (int s=0;s<NR_NUMBER_OF_SYMBOLS_PER_SLOT;s++){ - + c16_t rot=((c16_t*)frame_parms->symbol_rotation[1])[s + symb_offset]; LOG_D(PHY,"In %s: rotating txdataF symbol %d (%d) => (%d.%d)\n", - __FUNCTION__, - s, - s + symb_offset, - frame_parms->symbol_rotation[1][2 * (s + symb_offset)], - frame_parms->symbol_rotation[1][1 + (2 * (s + symb_offset))]); - - rotate_cpx_vector((int16_t *)&txdataF[ap][frame_parms->ofdm_symbol_size * s], - &frame_parms->symbol_rotation[1][2 * (s + symb_offset)], - (int16_t *)&txdataF[ap][frame_parms->ofdm_symbol_size * s], + __FUNCTION__, + s, + s + symb_offset, + rot.r, rot.i); + + rotate_cpx_vector((c16_t *)&txdataF[ap][frame_parms->ofdm_symbol_size * s], + &rot, + (c16_t *)&txdataF[ap][frame_parms->ofdm_symbol_size * s], frame_parms->ofdm_symbol_size, 15); } diff --git a/openair1/PHY/TOOLS/cmult_sv.c b/openair1/PHY/TOOLS/cmult_sv.c index 720b5da7e8154781fdd4c6e6c7a2b4e66d121da8..a9d502b9eb1b5d7a9ff0259030af7324389394c7 100644 --- a/openair1/PHY/TOOLS/cmult_sv.c +++ b/openair1/PHY/TOOLS/cmult_sv.c @@ -145,40 +145,9 @@ void multadd_real_four_symbols_vector_complex_scalar(int16_t *x, } #ifdef __AVX2__ -void rotate_cpx_vector(int16_t *x, - int16_t *alpha, - int16_t *y, - uint32_t N, - uint16_t output_shift) -{ - // multiply a complex vector with a complex value (alpha) - // stores result in y - // N is the number of complex numbers - // output_shift reduces the result of the multiplication by this number of bits - AssertFatal(N%8==0, "To be developped"); - const c16_t for_re={alpha[0], -alpha[1]}; - __m256i const alpha_for_real = _mm256_set1_epi32(*(uint32_t*)&for_re); - const c16_t for_im={alpha[1], alpha[0]}; - __m256i const alpha_for_im= _mm256_set1_epi32(*(uint32_t*)&for_im); - __m256i const perm_mask = - _mm256_set_epi8(31,30,23,22,29,28,21,20,27,26,19,18,25,24,17,16, - 15,14,7,6,13,12,5,4,11,10,3,2,9,8,1,0); - __m256i* xd= (__m256i*)x; - const __m256i *end=xd+N/8; - for( __m256i* yd = (__m256i *)y; xd<end ; yd++, xd++) { - const __m256i xre = _mm256_srai_epi32(_mm256_madd_epi16(*xd,alpha_for_real), - output_shift); - const __m256i xim = _mm256_srai_epi32(_mm256_madd_epi16(*xd,alpha_for_im), - output_shift); - // a bit faster than unpacklo+unpackhi+packs - const __m256i tmp=_mm256_packs_epi32(xre,xim); - *yd=_mm256_shuffle_epi8(tmp,perm_mask); - } -} -#else -void rotate_cpx_vector(int16_t *x, - int16_t *alpha, - int16_t *y, +void rotate_cpx_vector(c16_t *x, + c16_t *alpha, + c16_t *y, uint32_t N, uint16_t output_shift) { @@ -206,28 +175,28 @@ void rotate_cpx_vector(int16_t *x, __m128i shift = _mm_cvtsi32_si128(output_shift); register simd_q15_t m0,m1,m2,m3; - ((int16_t *)&alpha_128)[0] = alpha[0]; - ((int16_t *)&alpha_128)[1] = -alpha[1]; - ((int16_t *)&alpha_128)[2] = alpha[1]; - ((int16_t *)&alpha_128)[3] = alpha[0]; - ((int16_t *)&alpha_128)[4] = alpha[0]; - ((int16_t *)&alpha_128)[5] = -alpha[1]; - ((int16_t *)&alpha_128)[6] = alpha[1]; - ((int16_t *)&alpha_128)[7] = alpha[0]; + ((int16_t *)&alpha_128)[0] = alpha->r; + ((int16_t *)&alpha_128)[1] = -alpha->i; + ((int16_t *)&alpha_128)[2] = alpha->i; + ((int16_t *)&alpha_128)[3] = alpha->r; + ((int16_t *)&alpha_128)[4] = alpha->r; + ((int16_t *)&alpha_128)[5] = -alpha->i; + ((int16_t *)&alpha_128)[6] = alpha->i; + ((int16_t *)&alpha_128)[7] = alpha->r; #elif defined(__arm__) int32x4_t shift; int32x4_t ab_re0,ab_re1,ab_im0,ab_im1,re32,im32; int16_t reflip[8] __attribute__((aligned(16))) = {1,-1,1,-1,1,-1,1,-1}; int32x4x2_t xtmp; - ((int16_t *)&alpha_128)[0] = alpha[0]; - ((int16_t *)&alpha_128)[1] = alpha[1]; - ((int16_t *)&alpha_128)[2] = alpha[0]; - ((int16_t *)&alpha_128)[3] = alpha[1]; - ((int16_t *)&alpha_128)[4] = alpha[0]; - ((int16_t *)&alpha_128)[5] = alpha[1]; - ((int16_t *)&alpha_128)[6] = alpha[0]; - ((int16_t *)&alpha_128)[7] = alpha[1]; + ((int16_t *)&alpha_128)[0] = alpha->r; + ((int16_t *)&alpha_128)[1] = alpha->i; + ((int16_t *)&alpha_128)[2] = alpha->r; + ((int16_t *)&alpha_128)[3] = alpha->i; + ((int16_t *)&alpha_128)[4] = alpha->r; + ((int16_t *)&alpha_128)[5] = alpha->i; + ((int16_t *)&alpha_128)[6] = alpha->r; + ((int16_t *)&alpha_128)[7] = alpha->i; int16x8_t bflip = vrev32q_s16(alpha_128); int16x8_t bconj = vmulq_s16(alpha_128,*(int16x8_t *)reflip); shift = vdupq_n_s32(-output_shift); @@ -370,7 +339,7 @@ main () int16_t input[256] __attribute__((aligned(16))); int16_t input2[256] __attribute__((aligned(16))); int16_t output[256] __attribute__((aligned(16))); - int16_t alpha[2]; + c16_t alpha; int i; @@ -408,8 +377,8 @@ main () input2[14] = 1000; input2[15] = 2000; - alpha[0]=32767; - alpha[1]=0; + alpha->r=32767; + alpha->i=0; //mult_cpx_vector(input,input2,output,L,0); rotate_cpx_vector_norep(input,alpha,input,L,15); diff --git a/openair1/PHY/TOOLS/tools_defs.h b/openair1/PHY/TOOLS/tools_defs.h index 954301d938d8a686feff6c9e2ba5486352b4b217..18d1533166c066f67cc98860e13b341f0a7a303d 100644 --- a/openair1/PHY/TOOLS/tools_defs.h +++ b/openair1/PHY/TOOLS/tools_defs.h @@ -453,7 +453,7 @@ idft_size_idx_t get_idft(int ofdm_symbol_size) } -/*!\fn int32_t rotate_cpx_vector(int16_t *x,int16_t *alpha,int16_t *y,uint32_t N,uint16_t output_shift) +/*!\fn int32_t rotate_cpx_vector(c16_t *x,c16_t *alpha,c16_t *y,uint32_t N,uint16_t output_shift) This function performs componentwise multiplication of a vector with a complex scalar. @param x Vector input (Q1.15) in the format |Re0 Im0|,......,|Re(N-1) Im(N-1)| @param alpha Scalar input (Q1.15) in the format |Re0 Im0| @@ -463,11 +463,11 @@ This function performs componentwise multiplication of a vector with a complex s The function implemented is : \f$\mathbf{y} = \alpha\mathbf{x}\f$ */ -void rotate_cpx_vector(int16_t *x, - int16_t *alpha, - int16_t *y, - uint32_t N, - uint16_t output_shift); +void rotate_cpx_vector(c16_t *x, + c16_t *alpha, + c16_t *y, + uint32_t N, + uint16_t output_shift); //cadd_sv.c