diff --git a/common/utils/LOG/log.c b/common/utils/LOG/log.c index 4605df06e00a690895b90d6acc6c13cd62ff3e8b..043631d706a710f7384ba5897f5dc7160d6c68ec 100644 --- a/common/utils/LOG/log.c +++ b/common/utils/LOG/log.c @@ -48,6 +48,10 @@ // main log variables +// Fixme: a better place to be shure it is called +void read_cpu_hardware (void) __attribute__ ((constructor)); +void read_cpu_hardware (void) {__builtin_cpu_init(); } + log_mem_cnt_t log_mem_d[2]; int log_mem_flag=0; int log_mem_multi=1; diff --git a/common/utils/LOG/log.h b/common/utils/LOG/log.h index a1b70c7503f3aaed6f527cd79a21827da45adb6c..509c4581a92f3475a0f46f3324acb13a5fa3b1a3 100644 --- a/common/utils/LOG/log.h +++ b/common/utils/LOG/log.h @@ -366,6 +366,7 @@ typedef struct { #define MATLAB_CSHORT_BRACKET3 15 int32_t write_file_matlab(const char *fname, const char *vname, void *data, int length, int dec, unsigned int format, int multiVec); +#define write_output(a, b, c, d, e, f) write_file_matlab(a, b, c, d, e, f, 0) /*----------------macro definitions for reading log configuration from the config module */ #define CONFIG_STRING_LOG_PREFIX "log_config" diff --git a/common/utils/time_meas.c b/common/utils/time_meas.c index 9d1215b5e5b255e979401dc2fa8089c2bd903bcb..15766b5a3ff48a70b1c36b1746ce90143386f2e5 100644 --- a/common/utils/time_meas.c +++ b/common/utils/time_meas.c @@ -39,14 +39,14 @@ notifiedFIFO_t measur_fifo; double get_cpu_freq_GHz(void) { if (cpu_freq_GHz <1 ) { - time_stats_t ts = {0}; - reset_meas(&ts); - ts.trials++; - ts.in = rdtsc_oai(); - sleep(1); - ts.diff = (rdtsc_oai()-ts.in); - cpu_freq_GHz = (double)ts.diff/1000000000; - printf("CPU Freq is %f \n", cpu_freq_GHz); + time_stats_t ts = {0}; + reset_meas(&ts); + ts.trials++; + ts.in = rdtsc_oai(); + sleep(1); + ts.diff = (rdtsc_oai()-ts.in); + cpu_freq_GHz = (double)ts.diff/1000000000; + printf("CPU Freq is %f \n", cpu_freq_GHz); } return cpu_freq_GHz; } diff --git a/openair1/PHY/INIT/lte_init.c b/openair1/PHY/INIT/lte_init.c index c92d926a8f321f18037392a5659ab60dc9eb605c..8e351a15f20edd1c3bf5294deb2e461ed3b84a55 100644 --- a/openair1/PHY/INIT/lte_init.c +++ b/openair1/PHY/INIT/lte_init.c @@ -475,9 +475,7 @@ int phy_init_lte_eNB(PHY_VARS_eNB *eNB, srs_vars[srs_id].srs = (int32_t *) malloc16_clear (2 * fp->ofdm_symbol_size * sizeof (int32_t)); } - LOG_I(PHY,"PRACH allocation\n"); // PRACH - prach_vars->prachF = (int16_t *) malloc16_clear (1024 * 2 * sizeof (int16_t)); // assume maximum of 64 RX antennas for PRACH receiver prach_vars->prach_ifft[0] = (int32_t **) malloc16_clear (64 * sizeof (int32_t *)); @@ -485,8 +483,6 @@ int phy_init_lte_eNB(PHY_VARS_eNB *eNB, prach_vars->prach_ifft[0][i] = (int32_t *) malloc16_clear (1024 * 2 * sizeof (int32_t)); prach_vars->rxsigF[0] = (int16_t **) malloc16_clear (64 * sizeof (int16_t *)); - // PRACH BR - prach_vars_br->prachF = (int16_t *)malloc16_clear( 1024*2*sizeof(int32_t) ); // assume maximum of 64 RX antennas for PRACH receiver for (int ce_level = 0; ce_level < 4; ce_level++) { @@ -576,8 +572,6 @@ void phy_free_lte_eNB(PHY_VARS_eNB *eNB) { for (UE_id=0; UE_id<NUMBER_OF_SRS_MAX; UE_id++) free_and_zero(srs_vars[UE_id].srs); - free_and_zero(prach_vars->prachF); - for (i = 0; i < 64; i++) free_and_zero(prach_vars->prach_ifft[0][i]); free_and_zero(prach_vars->prach_ifft[0]); @@ -589,7 +583,6 @@ void phy_free_lte_eNB(PHY_VARS_eNB *eNB) { free_and_zero(prach_vars->rxsigF[ce_level]); } - free_and_zero(prach_vars_br->prachF); free_and_zero(prach_vars->rxsigF[0]); for (int ULSCH_id=0; ULSCH_id<NUMBER_OF_ULSCH_MAX; ULSCH_id++) { diff --git a/openair1/PHY/LTE_ESTIMATION/lte_ul_channel_estimation.c b/openair1/PHY/LTE_ESTIMATION/lte_ul_channel_estimation.c index 9327fe9fe89a255249794470e4160ec440732406..45c25b59ea11187be4b57d4b08aca88a3da810c2 100644 --- a/openair1/PHY/LTE_ESTIMATION/lte_ul_channel_estimation.c +++ b/openair1/PHY/LTE_ESTIMATION/lte_ul_channel_estimation.c @@ -106,9 +106,9 @@ int32_t lte_ul_channel_estimation(LTE_DL_FRAME_PARMS *frame_parms, #ifdef DEBUG_CH if (Ns==0) - LOG_M("drs_seq0.m","drsseq0",ul_ref_sigs_rx[u][v][Msc_RS_idx],2*Msc_RS,2,1); + LOG_M("drs_seq0.m","drsseq0",ul_ref_sigs_rx[u][v][Msc_RS_idx],2*Msc_RS,2,0); else - LOG_M("drs_seq1.m","drsseq1",ul_ref_sigs_rx[u][v][Msc_RS_idx],2*Msc_RS,2,1); + LOG_M("drs_seq1.m","drsseq1",ul_ref_sigs_rx[u][v][Msc_RS_idx],2*Msc_RS,2,0); #endif @@ -311,7 +311,7 @@ int32_t lte_ul_channel_estimation(LTE_DL_FRAME_PARMS *frame_parms, // alpha = (int16_t) (((int32_t) SCALE * (int32_t) (pilot_pos2-k))/(pilot_pos2-pilot_pos1)); // beta = (int16_t) (((int32_t) SCALE * (int32_t) (k-pilot_pos1))/(pilot_pos2-pilot_pos1)); #ifdef DEBUG_CH - LOG_D(PHY,"lte_ul_channel_estimation: k=%d, alpha = %d, beta = %d\n",k,alpha,beta); + LOG_D(PHY,"lte_ul_channel_estimation: k=%d\n",k); #endif //symbol_offset_subframe = frame_parms->N_RB_UL*12*k; @@ -436,9 +436,9 @@ int32_t lte_ul_channel_estimation_RRU(LTE_DL_FRAME_PARMS *frame_parms, #ifdef DEBUG_CH if (l==pilot_pos1) - write_output("drs_seq0.m","drsseq0",ul_ref_sigs_rx[u][v][Msc_RS_idx],Msc_RS,1,1); + write_output("drs_seq0.m","drsseq0",ul_ref_sigs_rx[u][v][Msc_RS_idx],Msc_RS,1,0); else - write_output("drs_seq1.m","drsseq1",ul_ref_sigs_rx[u][v][Msc_RS_idx],Msc_RS,1,1); + write_output("drs_seq1.m","drsseq1",ul_ref_sigs_rx[u][v][Msc_RS_idx],Msc_RS,1,0); #endif symbol_offset = frame_parms->N_RB_UL*12*l; diff --git a/openair1/PHY/LTE_TRANSPORT/phich.c b/openair1/PHY/LTE_TRANSPORT/phich.c index 1725588f4c228f0ebdabb87926afeff7efce8969..944e8fe18f136fda9ba2007d073bb712547c527d 100644 --- a/openair1/PHY/LTE_TRANSPORT/phich.c +++ b/openair1/PHY/LTE_TRANSPORT/phich.c @@ -74,7 +74,8 @@ void generate_phich(LTE_DL_FRAME_PARMS *frame_parms, uint32_t subframe_offset=((frame_parms->Ncp==0)?14:12)*frame_parms->ofdm_symbol_size*subframe; memset(d,0,24*sizeof(int16_t)); - + const int nushift=frame_parms->nushift; + if (frame_parms->nb_antenna_ports_eNB==1) gain_lin_QPSK = (int16_t)(((int32_t)amp*ONE_OVER_SQRT2_Q15)>>15); else @@ -245,7 +246,7 @@ void generate_phich(LTE_DL_FRAME_PARMS *frame_parms, y1_16[7] = -y0_16[5]; for (i=0,j=0,m=0; i<6; i++,j+=2) { - if ((i!=(frame_parms->nushift))&&(i!=(frame_parms->nushift+3))) { + if ((i!=(nushift))&&(i!=((nushift+3)%6))) { y0[j] += y0_16[m]; y1[j] += y1_16[m++]; y0[j+1] += y0_16[m]; @@ -287,7 +288,7 @@ void generate_phich(LTE_DL_FRAME_PARMS *frame_parms, y1_16[7] = -y0_16[5]; for (i=0,j=0,m=0; i<6; i++,j+=2) { - if ((i!=(frame_parms->nushift))&&(i!=(frame_parms->nushift+3))) { + if ((i!=(nushift))&&(i!=((nushift+3)%6))) { y0[j] += y0_16[m]; y1[j] += y1_16[m++]; y0[j+1] += y0_16[m]; @@ -329,7 +330,7 @@ void generate_phich(LTE_DL_FRAME_PARMS *frame_parms, y1_16[7] = -y0_16[5]; for (i=0,j=0,m=0; i<6; i++,j+=2) { - if ((i!=(frame_parms->nushift))&&(i!=(frame_parms->nushift+3))) { + if ((i!=(nushift))&&(i!=((nushift+3)%6))) { y0[j] += y0_16[m]; y1[j] += y1_16[m++]; y0[j+1] += y0_16[m]; @@ -360,7 +361,7 @@ void generate_phich(LTE_DL_FRAME_PARMS *frame_parms, y0_16[7] = d[7]*gain_lin_QPSK; for (i=0,j=0,m=0; i<6; i++,j+=2) { - if ((i!=(frame_parms->nushift))&&(i!=(frame_parms->nushift+3))) { + if ((i!=(nushift))&&(i!=((nushift+3)%6))) { y0[j] += y0_16[m++]; y0[j+1] += y0_16[m++]; } @@ -385,7 +386,7 @@ void generate_phich(LTE_DL_FRAME_PARMS *frame_parms, y0_16[7] = d[15]*gain_lin_QPSK; for (i=0,j=0,m=0; i<6; i++,j+=2) { - if ((i!=(frame_parms->nushift))&&(i!=(frame_parms->nushift+3))) { + if ((i!=(nushift))&&(i!=((nushift+3)%6))) { y0[j] += y0_16[m++]; y0[j+1] += y0_16[m++]; } @@ -410,7 +411,7 @@ void generate_phich(LTE_DL_FRAME_PARMS *frame_parms, y0_16[7] = d[23]*gain_lin_QPSK; for (i=0,j=0,m=0; i<6; i++,j+=2) { - if ((i!=(frame_parms->nushift))&&(i!=(frame_parms->nushift+3))) { + if ((i!=(nushift))&&(i!=((nushift+3)%6))) { y0[j] += y0_16[m++]; y0[j+1] += y0_16[m++]; } @@ -533,7 +534,7 @@ void generate_phich(LTE_DL_FRAME_PARMS *frame_parms, y1_16[7] = -y0_16[5]; for (i=0,j=0,m=0; i<6; i++,j+=2) { - if ((i!=(frame_parms->nushift))&&(i!=(frame_parms->nushift+3))) { + if ((i!=(nushift))&&(i!=((nushift+3)%6))) { y0[j] += y0_16[m]; y1[j] += y1_16[m++]; y0[j+1] += y0_16[m]; @@ -644,7 +645,7 @@ void generate_phich(LTE_DL_FRAME_PARMS *frame_parms, y0_16[7] = d[7]*gain_lin_QPSK; for (i=0,j=0,m=0; i<6; i++,j+=2) { - if ((i!=(frame_parms->nushift))&&(i!=(frame_parms->nushift+3))) { + if ((i!=(nushift))&&(i!=((nushift+3)%6))) { y0[j] += y0_16[m++]; y0[j+1] += y0_16[m++]; } diff --git a/openair1/PHY/LTE_TRANSPORT/prach.c b/openair1/PHY/LTE_TRANSPORT/prach.c index 91c419ebe39495931098001711d00ef45af97b28..6a989edc4d23a01e34fce301e11a9d55139e8ea6 100644 --- a/openair1/PHY/LTE_TRANSPORT/prach.c +++ b/openair1/PHY/LTE_TRANSPORT/prach.c @@ -62,9 +62,7 @@ void rx_prach0(PHY_VARS_eNB *eNB, uint8_t Ncs_config; uint8_t restricted_set; uint8_t n_ra_prb; - int16_t *prachF=NULL; int16_t **rxsigF=NULL; - int16_t *prach2; uint8_t preamble_index; uint16_t NCS,NCS2; uint16_t preamble_offset=0,preamble_offset_old; @@ -77,12 +75,10 @@ void rx_prach0(PHY_VARS_eNB *eNB, uint8_t not_found; int k=0; uint16_t u; - int16_t *Xu=0; - uint16_t offset; + c16_t *Xu=0; int16_t Ncp; uint16_t first_nonzero_root_idx=0; uint8_t new_dft=0; - uint8_t aa; int32_t lev; int16_t levdB; int fft_size=0,log2_ifft_size; @@ -130,14 +126,11 @@ void rx_prach0(PHY_VARS_eNB *eNB, tdd_mapindex,Nf); } - int16_t *prach[nb_rx]; uint8_t prach_fmt = get_prach_fmt(prach_ConfigIndex,frame_type); uint16_t N_ZC = (prach_fmt <4)?839:139; - if (eNB) { if (br_flag == 1) { prach_ifftp = eNB->prach_vars_br.prach_ifft[ce_level]; - prachF = eNB->prach_vars_br.prachF; rxsigF = eNB->prach_vars_br.rxsigF[ce_level]; if (LOG_DEBUGFLAG(PRACH)) { @@ -151,7 +144,6 @@ void rx_prach0(PHY_VARS_eNB *eNB, } } else { prach_ifftp = eNB->prach_vars.prach_ifft[0]; - prachF = eNB->prach_vars.prachF; rxsigF = eNB->prach_vars.rxsigF[0]; //if (LOG_DEBUGFLAG(PRACH)) { @@ -179,7 +171,8 @@ void rx_prach0(PHY_VARS_eNB *eNB, AssertFatal(ru!=NULL,"ru is null\n"); int8_t dBEn0=0; - for (aa=0; aa<nb_rx; aa++) { + int16_t *prach[nb_rx]; + for (int aa=0; aa<nb_rx; aa++) { if (ru->if_south == LOCAL_RF || ru->function == NGFI_RAU_IF5) { // set the time-domain signal if we have to use it in this node // DJP - indexing below in subframe zero takes us off the beginning of the array??? prach[aa] = (int16_t *)&ru->common.rxdata[aa][(subframe*fp->samples_per_tti)-ru->N_TA_offset]; @@ -291,9 +284,9 @@ void rx_prach0(PHY_VARS_eNB *eNB, LOG_D(PHY,"rx_prach: Doing FFT for N_RB_UL %d nb_rx:%d Ncp:%d\n",fp->N_RB_UL, nb_rx, Ncp); } - for (aa=0; aa<nb_rx; aa++) { + for (int aa=0; aa<nb_rx; aa++) { AssertFatal(prach[aa]!=NULL,"prach[%d] is null\n",aa); - prach2 = prach[aa] + (Ncp<<1); + int16_t *prach2 = prach[aa] + (Ncp<<1); // <<1 because type int16 but input is c16 // do DFT switch (fp->N_RB_UL) { @@ -562,37 +555,38 @@ void rx_prach0(PHY_VARS_eNB *eNB, if (new_dft == 1) { new_dft = 0; - + if (br_flag == 1) { - Xu=(int16_t *)eNB->X_u_br[ce_level][preamble_offset-first_nonzero_root_idx]; + Xu=(c16_t *)eNB->X_u_br[ce_level][preamble_offset-first_nonzero_root_idx]; prach_ifft = prach_ifftp[prach_ifft_cnt++]; - + if (eNB->prach_vars_br.repetition_number[ce_level]==1) memset(prach_ifft,0,((N_ZC==839)?2048:256)*sizeof(int32_t)); } else { - Xu=(int16_t *)eNB->X_u[preamble_offset-first_nonzero_root_idx]; + Xu=(c16_t *)eNB->X_u[preamble_offset-first_nonzero_root_idx]; prach_ifft = prach_ifftp[0]; memset(prach_ifft,0,((N_ZC==839) ? 2048 : 256)*sizeof(int32_t)); } - - memset(prachF, 0, sizeof(int16_t)*2*1024 ); - - if (LOG_DUMPFLAG(PRACH)) { - if (prach[0]!= NULL) LOG_M("prach_rx0.m","prach_rx0",prach[0],6144+792,1,1); - - LOG_M("prach_rx1.m","prach_rx1",prach[1],6144+792,1,1); - LOG_M("prach_rxF0.m","prach_rxF0",rxsigF[0],12288,1,1); - LOG_M("prach_rxF1.m","prach_rxF1",rxsigF[1],12288,1,1); - } - - for (aa=0; aa<nb_rx; aa++) { + c16_t prachF[1024] __attribute__((aligned(32)))={0}; + + if (LOG_DUMPFLAG(PRACH)) + for (int z=0; z<nb_rx; z++) + if( prach[z] ) { + char tmp[128]; + sprintf(tmp,"prach_rx%d.m", z); + LOG_M(tmp,tmp,prach[z],6144+792,1,1); + sprintf(tmp,"prach_rxF%d.m", z); + LOG_M(tmp,tmp,rxsigF[z],12288,1,1); + } + + for (int aa=0; aa<nb_rx; aa++) { // Do componentwise product with Xu* on each antenna k=0; - - for (offset=0; offset<(N_ZC<<1); offset+=2) { - prachF[offset] = (int16_t)(((int32_t)Xu[offset]*rxsigF[aa][k] + (int32_t)Xu[offset+1]*rxsigF[aa][k+1])>>15); - prachF[offset+1] = (int16_t)(((int32_t)Xu[offset]*rxsigF[aa][k+1] - (int32_t)Xu[offset+1]*rxsigF[aa][k])>>15); + + for (int offset=0; offset<N_ZC; offset++) { + prachF[offset].r = (int16_t)(((int32_t)Xu[offset].r*rxsigF[aa][k] + (int32_t)Xu[offset].i*rxsigF[aa][k+1])>>15); + prachF[offset].i = (int16_t)(((int32_t)Xu[offset].r*rxsigF[aa][k+1] - (int32_t)Xu[offset].i*rxsigF[aa][k])>>15); k+=2; - + if (k==(12*2*fp->ofdm_symbol_size)) k=0; } @@ -600,13 +594,13 @@ void rx_prach0(PHY_VARS_eNB *eNB, // Now do IFFT of size 1024 (N_ZC=839) or 256 (N_ZC=139) if (N_ZC == 839) { log2_ifft_size = 10; - idft(IDFT_1024,prachF,prach_ifft_tmp,1); + idft(IDFT_1024,(int16_t*)prachF,prach_ifft_tmp,1); // compute energy and accumulate over receive antennas and repetitions for BR for (i=0; i<2048; i++) prach_ifft[i] += (prach_ifft_tmp[i<<1]*prach_ifft_tmp[i<<1] + prach_ifft_tmp[1+(i<<1)]*prach_ifft_tmp[1+(i<<1)])>>9; } else { - idft(IDFT_256,prachF,prach_ifft_tmp,1); + idft(IDFT_256,(int16_t*)prachF,prach_ifft_tmp,1); log2_ifft_size = 8; // compute energy and accumulate over receive antennas and repetitions for BR @@ -674,14 +668,12 @@ void rx_prach0(PHY_VARS_eNB *eNB, if (br_flag == 0) { LOG_M("rxsigF.m","prach_rxF",&rxsigF[0][0],12288,1,1); - LOG_M("prach_rxF_comp0.m","prach_rxF_comp0",prachF,1024,1,1); LOG_M("Xu.m","xu",Xu,N_ZC,1,1); LOG_M("prach_ifft0.m","prach_t0",prach_ifft,1024,1,1); LOG_M("SF2_3.m","sf2_3",&ru->common.rxdata[0][2*fp->samples_per_tti],2*fp->samples_per_tti,1,1); } else { LOG_E(PHY,"Dumping prach (br_flag %d), k = %d (n_ra_prb %d)\n",br_flag,k,n_ra_prb); LOG_M("rxsigF_br.m","prach_rxF_br",&rxsigF[0][0],12288,1,1); - LOG_M("prach_rxF_comp0_br.m","prach_rxF_comp0_br",prachF,1024,1,1); LOG_M("Xu_br.m","xu_br",Xu,N_ZC,1,1); LOG_M("prach_ifft0_br.m","prach_t0_br",prach_ifft,1024,1,1); } diff --git a/openair1/PHY/NR_ESTIMATION/nr_measurements_gNB.c b/openair1/PHY/NR_ESTIMATION/nr_measurements_gNB.c index c527c3fc9df083c115f095d1d655e1c35a81c98f..d8cf2fdc1b9f006a07c1faf0540bc0d76606f51b 100644 --- a/openair1/PHY/NR_ESTIMATION/nr_measurements_gNB.c +++ b/openair1/PHY/NR_ESTIMATION/nr_measurements_gNB.c @@ -67,7 +67,7 @@ int nr_est_timing_advance_pusch(PHY_VARS_gNB* gNB, int UE_id) return max_pos - sync_pos; } -int nr_est_timing_advance_srs(const NR_DL_FRAME_PARMS *frame_parms, +int nr_est_timing_advance_srs(const NR_DL_FRAME_PARMS *frame_parms, const int32_t srs_estimated_channel_time[][frame_parms->ofdm_symbol_size]) { int timing_advance = 0; int max_val = 0; diff --git a/openair1/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c b/openair1/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c index 6093116ec903fe474af62405e6f9a8edb25987eb..1d6ff4603f8f5a0b9edec915011db31f4c13cfed 100644 --- a/openair1/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c +++ b/openair1/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c @@ -77,6 +77,29 @@ void freq2time(uint16_t ofdm_symbol_size, } } + +__attribute__((always_inline)) inline c16_t c32x16cumulVectVectWithSteps(c16_t *in1, + int *offset1, + const int step1, + c16_t *in2, + int *offset2, + const int step2, + const int modulo2, + const int N) { + + int localOffset1=*offset1; + int localOffset2=*offset2; + c32_t cumul={0}; + for (int i=0; i<N; i++) { + cumul=c32x16maddShift(in1[localOffset1], in2[localOffset2], cumul, 15); + localOffset1+=step1; + localOffset2= (localOffset2 + step2) % modulo2; + } + *offset1=localOffset1; + *offset2=localOffset2; + return c16x32div(cumul, N); +} + int nr_pusch_channel_estimation(PHY_VARS_gNB *gNB, unsigned char Ns, unsigned short p, @@ -84,86 +107,72 @@ int nr_pusch_channel_estimation(PHY_VARS_gNB *gNB, int ul_id, unsigned short bwp_start_subcarrier, nfapi_nr_pusch_pdu_t *pusch_pdu) { - - int pilot[3280] __attribute__((aligned(16))); - unsigned char aarx; - unsigned short k0; - unsigned int pilot_cnt,re_cnt; - int16_t ch[2],ch_r[2],ch_l[2],*pil,*rxF,*ul_ch; + c16_t pilot[3280] __attribute__((aligned(16))); int16_t *fl,*fm,*fr,*fml,*fmr,*fmm,*fdcl,*fdcr,*fdclh,*fdcrh; - int ch_offset,symbol_offset ; - int32_t **ul_ch_estimates_time = gNB->pusch_vars[ul_id]->ul_ch_estimates_time; - int chest_freq = gNB->chest_freq; - __m128i *ul_ch_128; + + const int chest_freq = gNB->chest_freq; #ifdef DEBUG_CH FILE *debug_ch_est; debug_ch_est = fopen("debug_ch_est.txt","w"); #endif - //uint16_t Nid_cell = (eNB_offset == 0) ? gNB->frame_parms.Nid_cell : gNB->measurements.adj_cell_id[eNB_offset-1]; - - uint8_t nushift; - int **ul_ch_estimates = gNB->pusch_vars[ul_id]->ul_ch_estimates; - int **rxdataF = gNB->common_vars.rxdataF; - int soffset = (Ns&3)*gNB->frame_parms.symbols_per_slot*gNB->frame_parms.ofdm_symbol_size; - nushift = (p>>1)&1; + c16_t **ul_ch_estimates = (c16_t **) gNB->pusch_vars[ul_id]->ul_ch_estimates; + const int symbolSize = gNB->frame_parms.ofdm_symbol_size; + const int soffset = (Ns&3)*gNB->frame_parms.symbols_per_slot*symbolSize; + const int nushift = (p>>1)&1; gNB->frame_parms.nushift = nushift; + int ch_offset = symbolSize*symbol; + const int symbol_offset = symbolSize*symbol; - ch_offset = gNB->frame_parms.ofdm_symbol_size*symbol; - - symbol_offset = gNB->frame_parms.ofdm_symbol_size*symbol; - - k0 = bwp_start_subcarrier; - int re_offset; - - uint16_t nb_rb_pusch = pusch_pdu->rb_size; + const int k0 = bwp_start_subcarrier; + const int nb_rb_pusch = pusch_pdu->rb_size; LOG_D(PHY, "In %s: ch_offset %d, soffset %d, symbol_offset %d, OFDM size %d, Ns = %d, k0 = %d, symbol %d\n", __FUNCTION__, ch_offset, soffset, symbol_offset, - gNB->frame_parms.ofdm_symbol_size, + symbolSize, Ns, k0, symbol); switch (nushift) { - case 0: - fl = filt8_l0; - fm = filt8_m0; - fr = filt8_r0; - fmm = filt8_mm0; - fml = filt8_m0; - fmr = filt8_mr0; - fdcl = filt8_dcl0; - fdcr = filt8_dcr0; - fdclh = filt8_dcl0_h; - fdcrh = filt8_dcr0_h; - break; - - case 1: - fl = filt8_l1; - fm = filt8_m1; - fr = filt8_r1; - fmm = filt8_mm1; - fml = filt8_ml1; - fmr = filt8_mm1; - fdcl = filt8_dcl1; - fdcr = filt8_dcr1; - fdclh = filt8_dcl1_h; - fdcrh = filt8_dcr1_h; - break; - - default: + case 0: + fl = filt8_l0; + fm = filt8_m0; + fr = filt8_r0; + fmm = filt8_mm0; + fml = filt8_m0; + fmr = filt8_mr0; + fdcl = filt8_dcl0; + fdcr = filt8_dcr0; + fdclh = filt8_dcl0_h; + fdcrh = filt8_dcr0_h; + break; + + case 1: + fl = filt8_l1; + fm = filt8_m1; + fr = filt8_r1; + fmm = filt8_mm1; + fml = filt8_ml1; + fmr = filt8_mm1; + fdcl = filt8_dcl1; + fdcr = filt8_dcr1; + fdclh = filt8_dcl1_h; + fdcrh = filt8_dcr1_h; + break; + + default: #ifdef DEBUG_CH if (debug_ch_est) fclose(debug_ch_est); #endif - return(-1); - break; - } + return(-1); + break; + } //------------------generate DMRS------------------// @@ -171,656 +180,402 @@ int nr_pusch_channel_estimation(PHY_VARS_gNB *gNB, gNB->pusch_gold_init[pusch_pdu->scid] = pusch_pdu->ul_dmrs_scrambling_id; nr_gold_pusch(gNB, pusch_pdu->scid, pusch_pdu->ul_dmrs_scrambling_id); } + if (pusch_pdu->transform_precoding == transformPrecoder_disabled) { - nr_pusch_dmrs_rx(gNB, Ns, gNB->nr_gold_pusch_dmrs[pusch_pdu->scid][Ns][symbol], &pilot[0], (1000+p), 0, nb_rb_pusch, + nr_pusch_dmrs_rx(gNB, Ns, gNB->nr_gold_pusch_dmrs[pusch_pdu->scid][Ns][symbol], (int32_t *)pilot, (1000+p), 0, nb_rb_pusch, (pusch_pdu->bwp_start + pusch_pdu->rb_start)*NR_NB_SC_PER_RB, pusch_pdu->dmrs_config_type); - } - else { // if transform precoding or SC-FDMA is enabled in Uplink - + } else { // if transform precoding or SC-FDMA is enabled in Uplink // NR_SC_FDMA supports type1 DMRS so only 6 DMRS REs per RB possible - uint16_t index = get_index_for_dmrs_lowpapr_seq(nb_rb_pusch * (NR_NB_SC_PER_RB/2)); - uint8_t u = pusch_pdu->dfts_ofdm.low_papr_group_number; - uint8_t v = pusch_pdu->dfts_ofdm.low_papr_sequence_number; + const uint16_t index = get_index_for_dmrs_lowpapr_seq(nb_rb_pusch * (NR_NB_SC_PER_RB/2)); + const uint8_t u = pusch_pdu->dfts_ofdm.low_papr_group_number; + const uint8_t v = pusch_pdu->dfts_ofdm.low_papr_sequence_number; int16_t *dmrs_seq = gNB_dmrs_lowpaprtype1_sequence[u][v][index]; - AssertFatal(index >= 0, "Num RBs not configured according to 3GPP 38.211 section 6.3.1.4. For PUSCH with transform precoding, num RBs cannot be multiple of any other primenumber other than 2,3,5\n"); AssertFatal(dmrs_seq != NULL, "DMRS low PAPR seq not found, check if DMRS sequences are generated"); - LOG_D(PHY,"Transform Precoding params. u: %d, v: %d, index for dmrsseq: %d\n", u, v, index); - - nr_pusch_lowpaprtype1_dmrs_rx(gNB, Ns, dmrs_seq, &pilot[0], 1000, 0, nb_rb_pusch, 0, pusch_pdu->dmrs_config_type); - - #ifdef DEBUG_PUSCH - printf ("NR_UL_CHANNEL_EST: index %d, u %d,v %d\n", index, u, v); - LOG_M("gNb_DMRS_SEQ.m","gNb_DMRS_SEQ", dmrs_seq,6*nb_rb_pusch,1,1); - #endif - + nr_pusch_lowpaprtype1_dmrs_rx(gNB, Ns, dmrs_seq, (int32_t *)pilot, 1000, 0, nb_rb_pusch, 0, pusch_pdu->dmrs_config_type); +#ifdef DEBUG_PUSCH + printf ("NR_UL_CHANNEL_EST: index %d, u %d,v %d\n", index, u, v); + LOG_M("gNb_DMRS_SEQ.m","gNb_DMRS_SEQ", dmrs_seq,6*nb_rb_pusch,1,1); +#endif } //------------------------------------------------// - #ifdef DEBUG_PUSCH + for (int i = 0; i < (6 * nb_rb_pusch); i++) { - LOG_I(PHY, "In %s: %d + j*(%d)\n", - __FUNCTION__, - ((int16_t*)pilot)[2 * i], - ((int16_t*)pilot)[1 + (2 * i)]); + LOG_I(PHY, "In %s: %d + j*(%d)\n", __FUNCTION__, pilot[i].r,pilot[i].i); } + #endif + const uint8_t b_shift = pusch_pdu->nrOfLayers == 1; + + for (int aarx=0; aarx<gNB->frame_parms.nb_antennas_rx; aarx++) { + c16_t *rxdataF = (c16_t *)&gNB->common_vars.rxdataF[aarx][symbol_offset]; + c16_t *ul_ch = &ul_ch_estimates[p*gNB->frame_parms.nb_antennas_rx+aarx][ch_offset]; - uint8_t b_shift = pusch_pdu->nrOfLayers == 1; - - for (aarx=0; aarx<gNB->frame_parms.nb_antennas_rx; aarx++) { - - pil = (int16_t *)&pilot[0]; - rxF = (int16_t *)&rxdataF[aarx][(soffset + symbol_offset + k0 + nushift)]; - ul_ch = (int16_t *)&ul_ch_estimates[p*gNB->frame_parms.nb_antennas_rx+aarx][ch_offset]; - re_offset = k0; - - memset(ul_ch,0,4*(gNB->frame_parms.ofdm_symbol_size)); - + memset(ul_ch,0,sizeof(*ul_ch)*symbolSize); #ifdef DEBUG_PUSCH LOG_I(PHY, "In %s symbol_offset %d, nushift %d\n", __FUNCTION__, symbol_offset, nushift); - LOG_I(PHY, "In %s ch est pilot addr %p, N_RB_UL %d\n", __FUNCTION__, &pilot[0], gNB->frame_parms.N_RB_UL); + LOG_I(PHY, "In %s ch est pilot, N_RB_UL %d\n", __FUNCTION__, gNB->frame_parms.N_RB_UL); LOG_I(PHY, "In %s bwp_start_subcarrier %d, k0 %d, first_carrier %d, nb_rb_pusch %d\n", __FUNCTION__, bwp_start_subcarrier, k0, gNB->frame_parms.first_carrier_offset, nb_rb_pusch); - LOG_I(PHY, "In %s rxF addr %p p %d\n", __FUNCTION__, rxF, p); LOG_I(PHY, "In %s ul_ch addr %p nushift %d\n", __FUNCTION__, ul_ch, nushift); #endif - - if (pusch_pdu->dmrs_config_type == pusch_dmrs_type1 && chest_freq == 0){ + + if (pusch_pdu->dmrs_config_type == pusch_dmrs_type1 && chest_freq == 0) { + c16_t *pil = pilot; + int re_offset = k0; LOG_D(PHY,"PUSCH estimation DMRS type 1, Freq-domain interpolation"); - // For configuration type 1: k = 4*n + 2*k' + delta, // where k' is 0 or 1, and delta is in Table 6.4.1.1.3-1 from TS 38.211 - - pilot_cnt = 0; + int pilot_cnt = 0; int delta = nr_pusch_dmrs_delta(pusch_dmrs_type1, p); - + for (int n = 0; n < 3*nb_rb_pusch; n++) { - // LS estimation - ch[0] = 0; - ch[1] = 0; + c32_t ch = {0}; + for (int k_line = 0; k_line <= 1; k_line++) { - re_offset = (k0 + (n << 2) + (k_line << 1) + delta) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *) &rxdataF[aarx][(soffset + symbol_offset + re_offset)]; - ch[0] += (int16_t) (((int32_t) pil[0] * rxF[0] - (int32_t) pil[1] * rxF[1]) >> (15+b_shift)); - ch[1] += (int16_t) (((int32_t) pil[0] * rxF[1] + (int32_t) pil[1] * rxF[0]) >> (15+b_shift)); - pil += 2; + re_offset = (k0 + (n << 2) + (k_line << 1) + delta) % symbolSize; + ch=c32x16maddShift(*pil, + rxdataF[soffset + re_offset], + ch, + 15+b_shift); + pil++; } - + + c16_t ch16= {.r=(int16_t)ch.r, .i=(int16_t)ch.i}; + // Channel interpolation for (int k_line = 0; k_line <= 1; k_line++) { #ifdef DEBUG_PUSCH - re_offset = (k0 + (n << 2) + (k_line << 1)) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *) &rxdataF[aarx][(soffset + symbol_offset + re_offset)]; + re_offset = (k0 + (n << 2) + (k_line << 1)) % symbolSize; + c16_t *rxF = &rxdataF[soffset + re_offset]; printf("pilot %4u: pil -> (%6d,%6d), rxF -> (%4d,%4d), ch -> (%4d,%4d)\n", - pilot_cnt, pil[0], pil[1], rxF[0], rxF[1], ch[0], ch[1]); - //printf("data %4u: rxF -> (%4d,%4d) (%2d)\n",pilot_cnt, rxF[2], rxF[3], dBc(rxF[2], rxF[3])); + pilot_cnt, pil->r, pil->i, rxF->r, rxF->i, ch.r, ch.i); #endif + if (pilot_cnt == 0) { - multadd_real_vector_complex_scalar(fl, ch, ul_ch, 8); + c16multaddVectRealComplex(fl, &ch16, ul_ch, 8); } else if (pilot_cnt == 1) { - multadd_real_vector_complex_scalar(fml, ch, ul_ch, 8); + c16multaddVectRealComplex(fml, &ch16, ul_ch, 8); } else if (pilot_cnt == (6*nb_rb_pusch-2)) { - multadd_real_vector_complex_scalar(fmr, ch, ul_ch, 8); - ul_ch+=8; + c16multaddVectRealComplex(fmr, &ch16, ul_ch, 8); + ul_ch+=4; } else if (pilot_cnt == (6*nb_rb_pusch-1)) { - multadd_real_vector_complex_scalar(fr, ch, ul_ch, 8); + c16multaddVectRealComplex(fr, &ch16, ul_ch, 8); } else if (pilot_cnt%2 == 0) { - multadd_real_vector_complex_scalar(fmm, ch, ul_ch, 8); - ul_ch+=8; + c16multaddVectRealComplex(fmm, &ch16, ul_ch, 8); + ul_ch+=4; } else { - multadd_real_vector_complex_scalar(fm, ch, ul_ch, 8); + c16multaddVectRealComplex(fm, &ch16, ul_ch, 8); } + pilot_cnt++; } } // check if PRB crosses DC and improve estimates around DC - if ((bwp_start_subcarrier < gNB->frame_parms.ofdm_symbol_size) && (bwp_start_subcarrier+nb_rb_pusch*12 >= gNB->frame_parms.ofdm_symbol_size)) { - ul_ch = (int16_t *)&ul_ch_estimates[p*gNB->frame_parms.nb_antennas_rx+aarx][ch_offset]; - uint16_t idxDC = 2*(gNB->frame_parms.ofdm_symbol_size - bwp_start_subcarrier); - uint16_t idxPil = idxDC/2; + if ((bwp_start_subcarrier < symbolSize) && (bwp_start_subcarrier+nb_rb_pusch*12 >= symbolSize)) { + ul_ch = &ul_ch_estimates[p*gNB->frame_parms.nb_antennas_rx+aarx][ch_offset]; + const uint16_t idxDC = symbolSize - bwp_start_subcarrier; re_offset = k0; - pil = (int16_t *)&pilot[0]; - pil += (idxPil-2); - ul_ch += (idxDC-4); - ul_ch = memset(ul_ch, 0, sizeof(int16_t)*10); - re_offset = (re_offset+idxDC/2-2) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][soffset+(symbol_offset+nushift+re_offset)]; - ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); - ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); - - // for proper allignment of SIMD vectors + pil = pilot + idxDC / 2 - 1; + ul_ch += idxDC - 2 ; + ul_ch = memset(ul_ch, 0, sizeof(*ul_ch)*5); + re_offset = (re_offset+idxDC-1) % symbolSize; + const c16_t ch=c16mulShift(*pil, + rxdataF[soffset+nushift+re_offset], + 15); + + // for proper alignment of SIMD vectors if((gNB->frame_parms.N_RB_UL&1)==0) { - - multadd_real_vector_complex_scalar(fdcl, ch, ul_ch-4, 8); - - pil += 4; - re_offset = (re_offset+4) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); - ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); - - multadd_real_vector_complex_scalar(fdcr, ch, ul_ch-4, 8); - + c16multaddVectRealComplex(fdcl, &ch, ul_ch-2, 8); + pil += 2; + re_offset = (re_offset+4) % symbolSize; + const c16_t ch_tmp=c16mulShift(*pil, + rxdataF[nushift+re_offset], + 15); + c16multaddVectRealComplex(fdcr, &ch_tmp, ul_ch-2, 8); } else { - - multadd_real_vector_complex_scalar(fdclh, ch, ul_ch, 8); - - pil += 4; - re_offset = (re_offset+4) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][soffset+(symbol_offset+nushift+re_offset)]; - ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); - ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); - - multadd_real_vector_complex_scalar(fdcrh, ch, ul_ch, 8); + c16multaddVectRealComplex(fdclh, &ch, ul_ch, 8); + pil += 2; + re_offset = (re_offset+4) % symbolSize; + const c16_t ch_tmp=c16mulShift(*pil, + rxdataF[soffset+nushift+re_offset], + 15); + c16multaddVectRealComplex(fdcrh, &ch_tmp, ul_ch, 8); } } #ifdef DEBUG_PUSCH - ul_ch = (int16_t *)&ul_ch_estimates[p*gNB->frame_parms.nb_antennas_rx+aarx][ch_offset]; + ul_ch = &ul_ch_estimates[p*gNB->frame_parms.nb_antennas_rx+aarx][ch_offset]; + for(uint16_t idxP=0; idxP<ceil((float)nb_rb_pusch*12/8); idxP++) { printf("(%3d)\t",idxP); - for(uint8_t idxI=0; idxI<16; idxI += 2) { - printf("%d\t%d\t",ul_ch[idxP*16+idxI],ul_ch[idxP*16+idxI+1]); + + for(uint8_t idxI=0; idxI<8; idxI++) { + printf("%d\t%d\t",ul_ch[idxP*8+idxI].r,ul_ch[idxP*8+idxI].i); } + printf("\n"); } -#endif - } - else if (pusch_pdu->dmrs_config_type == pusch_dmrs_type2 && chest_freq == 0) { //pusch_dmrs_type2 |p_r,p_l,d,d,d,d,p_r,p_l,d,d,d,d| + +#endif + } else if (pusch_pdu->dmrs_config_type == pusch_dmrs_type2 && chest_freq == 0) { //pusch_dmrs_type2 |p_r,p_l,d,d,d,d,p_r,p_l,d,d,d,d| LOG_D(PHY,"PUSCH estimation DMRS type 2, Freq-domain interpolation"); + c16_t *pil = pilot; + int re_offset = k0; // Treat first DMRS specially (left edge) - - rxF = (int16_t *)&rxdataF[aarx][soffset+(symbol_offset+nushift+re_offset)]; - - ul_ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); - ul_ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); - - pil += 2; - ul_ch += 2; - re_offset = (re_offset + 1)%gNB->frame_parms.ofdm_symbol_size; + *ul_ch=c16mulShift(*pil, + rxdataF[soffset+nushift+re_offset], + 15); + pil++; + ul_ch++; + re_offset = (re_offset + 1)%symbolSize; + ch_offset++; + + // TO verify: used after the loop, likely a piece of code is missing for ch_r + c16_t ch_r; + for (int re_cnt = 1; re_cnt < (nb_rb_pusch*NR_NB_SC_PER_RB) - 5; re_cnt += 6) { + c16_t ch_l=c16mulShift(*pil, + rxdataF[soffset+nushift+re_offset], + 15); + *ul_ch = ch_l; + pil++; + ul_ch++; ch_offset++; - - for (re_cnt = 1; re_cnt < (nb_rb_pusch*NR_NB_SC_PER_RB) - 5; re_cnt += 6){ - - rxF = (int16_t *)&rxdataF[aarx][soffset+(symbol_offset+nushift+re_offset)]; - - ch_l[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); - ch_l[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); - - ul_ch[0] = ch_l[0]; - ul_ch[1] = ch_l[1]; - - pil += 2; - ul_ch += 2; - ch_offset++; - - multadd_real_four_symbols_vector_complex_scalar(filt8_ml2, - ch_l, - ul_ch); - - re_offset = (re_offset+5)%gNB->frame_parms.ofdm_symbol_size; - - rxF = (int16_t *)&rxdataF[aarx][soffset+symbol_offset+nushift+re_offset]; - - ch_r[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); - ch_r[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); - - - multadd_real_four_symbols_vector_complex_scalar(filt8_mr2, - ch_r, - ul_ch); - - //for (int re_idx = 0; re_idx < 8; re_idx += 2) - //printf("ul_ch = %d + j*%d\n", ul_ch[re_idx], ul_ch[re_idx+1]); - - ul_ch += 8; - ch_offset += 4; - - ul_ch[0] = ch_r[0]; - ul_ch[1] = ch_r[1]; - - pil += 2; - ul_ch += 2; - ch_offset++; - re_offset = (re_offset + 1)%gNB->frame_parms.ofdm_symbol_size; - - } - - // Treat last pilot specially (right edge) - - rxF = (int16_t *)&rxdataF[aarx][soffset+(symbol_offset+nushift+re_offset)]; - - ch_l[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); - ch_l[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); - - ul_ch[0] = ch_l[0]; - ul_ch[1] = ch_l[1]; - - ul_ch += 2; - ch_offset++; - - multadd_real_four_symbols_vector_complex_scalar(filt8_rr1, - ch_l, - ul_ch); - - multadd_real_four_symbols_vector_complex_scalar(filt8_rr2, - ch_r, + multadd_real_four_symbols_vector_complex_scalar(filt8_ml2, + &ch_l, + ul_ch); + re_offset = (re_offset+5)%symbolSize; + ch_r=c16mulShift(*pil, + rxdataF[soffset+nushift+re_offset], + 15); + multadd_real_four_symbols_vector_complex_scalar(filt8_mr2, + &ch_r, ul_ch); - - ul_ch_128 = (__m128i *)&ul_ch_estimates[p*gNB->frame_parms.nb_antennas_rx+aarx][ch_offset]; - - ul_ch_128[0] = _mm_slli_epi16 (ul_ch_128[0], 2); - } - - else if (pusch_pdu->dmrs_config_type == pusch_dmrs_type1) {// this is case without frequency-domain linear interpolation, just take average of LS channel estimates of 6 DMRS REs and use a common value for the whole PRB - LOG_D(PHY,"PUSCH estimation DMRS type 1, no Freq-domain interpolation"); - int32_t ch_0, ch_1; + //for (int re_idx = 0; re_idx < 8; re_idx += 2) + //printf("ul_ch = %d + j*%d\n", ul_ch[re_idx], ul_ch[re_idx+1]); + ul_ch += 4; + ch_offset += 4; + *ul_ch = ch_r; + pil++; + ul_ch++; + ch_offset++; + re_offset = (re_offset + 1)%symbolSize; + } + + // Treat last pilot specially (right edge) + c16_t ch_l=c16mulShift(*pil, + rxdataF[soffset+nushift+re_offset], + 15); + *ul_ch = ch_l; + ul_ch++; + ch_offset++; + multadd_real_four_symbols_vector_complex_scalar(filt8_rr1, + &ch_l, + ul_ch); + multadd_real_four_symbols_vector_complex_scalar(filt8_rr2, + &ch_r, + ul_ch); + __m128i *ul_ch_128 = (__m128i *)&ul_ch_estimates[p*gNB->frame_parms.nb_antennas_rx+aarx][ch_offset]; + ul_ch_128[0] = _mm_slli_epi16 (ul_ch_128[0], 2); + } + + else if (pusch_pdu->dmrs_config_type == pusch_dmrs_type1) { // this is case without frequency-domain linear interpolation, just take average of LS channel estimates of 6 DMRS REs and use a common value for the whole PRB + LOG_D(PHY,"PUSCH estimation DMRS type 1, no Freq-domain interpolation\n"); + c16_t *rxF = &rxdataF[soffset + nushift]; + int pil_offset = 0; + int re_offset = k0; + c16_t ch; + // First PRB - ch_0 = ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 = ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+2) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch_0 += ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 += ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+2) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch_0 += ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 += ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+2) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch_0 += ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 += ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+2) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch_0 += ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 += ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+2) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch_0 += ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 += ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+2) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch[0] = ch_0 / 6; - ch[1] = ch_1 / 6; - - - + ch=c32x16cumulVectVectWithSteps(pilot, &pil_offset, 1, rxF, &re_offset, 2, symbolSize, 6); #if NO_INTERP - for (int i=0;i<12;i++) ((int32_t*)ul_ch)[i] = *(int32_t*)ch; - ul_ch+=24; + for (c16_t *end=ul_ch+12; ul_ch<end; ul_ch++) + *ul_ch=ch; #else - multadd_real_vector_complex_scalar(filt8_avlip0, - ch, - ul_ch, - 8); - - ul_ch += 16; - multadd_real_vector_complex_scalar(filt8_avlip1, - ch, - ul_ch, - 8); - - ul_ch += 16; - multadd_real_vector_complex_scalar(filt8_avlip2, - ch, - ul_ch, - 8); - ul_ch -= 24; + c16multaddVectRealComplex(filt8_avlip0, ch, ul_ch, 8); + ul_ch += 8; + c16multaddVectRealComplex(filt8_avlip1, ch, ul_ch, 8); + ul_ch += 8; + c16multaddVectRealComplex(filt8_avlip2, ch, ul_ch, 8); + ul_ch -= 12; #endif - for (pilot_cnt=6; pilot_cnt<6*(nb_rb_pusch-1); pilot_cnt += 6) { - - ch_0 = ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 = ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+2) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch_0 += ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 += ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+2) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch_0 += ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 += ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+2) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch_0 += ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 += ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+2) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch_0 += ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 += ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+2) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch_0 += ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 += ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+2) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch[0] = ch_0 / 6; - ch[1] = ch_1 / 6; + for (int pilot_cnt=6; pilot_cnt<6*(nb_rb_pusch-1); pilot_cnt += 6) { + ch=c32x16cumulVectVectWithSteps(pilot, &pil_offset, 1, rxF, &re_offset, 2, symbolSize, 6); #if NO_INTERP - for (int i=0;i<12;i++) ((int32_t*)ul_ch)[i] = *(int32_t*)ch; - ul_ch+=24; + for (c16_t *end=ul_ch+12; ul_ch<end; ul_ch++) + *ul_ch=ch; #else - ul_ch[6] += (ch[0] * 1365)>>15; // 1/12*16384 - ul_ch[7] += (ch[1] * 1365)>>15; // 1/12*16384 + ul_ch[3].r += (ch.r * 1365)>>15; // 1/12*16384 + ul_ch[3].i += (ch.i * 1365)>>15; // 1/12*16384 + + ul_ch += 4; + multadd_real_vector_complex_scalar(filt8_avlip3, ch, ul_ch, 8); ul_ch += 8; - multadd_real_vector_complex_scalar(filt8_avlip3, - ch, - ul_ch, - 8); - - ul_ch += 16; - multadd_real_vector_complex_scalar(filt8_avlip4, - ch, - ul_ch, - 8); - - ul_ch += 16; - multadd_real_vector_complex_scalar(filt8_avlip5, - ch, - ul_ch, - 8); - ul_ch -= 16; + multadd_real_vector_complex_scalar(filt8_avlip4, ch, ul_ch, 8); + + ul_ch += 8; + multadd_real_vector_complex_scalar(filt8_avlip5, ch, ul_ch, 8); + ul_ch -= 8; #endif } // Last PRB - ch_0 = ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 = ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+2) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch_0 += ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 += ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+2) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch_0 += ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 += ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+2) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch_0 += ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 += ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+2) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch_0 += ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 += ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+2) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch_0 += ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 += ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+2) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch[0] = ch_0 / 6; - ch[1] = ch_1 / 6; - + ch=c32x16cumulVectVectWithSteps(pilot, &pil_offset, 1, rxF, &re_offset, 2, symbolSize, 6); + #if NO_INTERP - for (int i=0;i<12;i++) ((int32_t*)ul_ch)[i] = *(int32_t*)ch; - ul_ch+=24; + for (c16_t *end=ul_ch+12; ul_ch<end; ul_ch++) + *ul_ch=ch; #else - ul_ch[6] += (ch[0] * 1365)>>15; // 1/12*16384 - ul_ch[7] += (ch[1] * 1365)>>15; // 1/12*16384 - - ul_ch += 8; - multadd_real_vector_complex_scalar(filt8_avlip3, + ul_ch[3].r += (ch.r * 1365)>>15; // 1/12*16384 + ul_ch[3].i += (ch.i * 1365)>>15; // 1/12*16384 + + ul_ch += 4; + c16multaddVectRealComplex(filt8_avlip3, ch, ul_ch, 8); - - ul_ch += 16; - multadd_real_vector_complex_scalar(filt8_avlip6, + + ul_ch += 8; + c16multaddVectRealComplex(filt8_avlip6, ch, ul_ch, 8); #endif - } - else { // this is case without frequency-domain linear interpolation, just take average of LS channel estimates of 4 DMRS REs and use a common value for the whole PRB + } else { // this is case without frequency-domain linear interpolation, just take average of LS channel estimates of 4 DMRS REs and use a common value for the whole PRB LOG_D(PHY,"PUSCH estimation DMRS type 2, no Freq-domain interpolation"); - int32_t ch_0, ch_1; + c16_t *pil = pilot; + int re_offset = k0; + c32_t ch0={0}; //First PRB - ch_0 = ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 = ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+1) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch_0 += ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 += ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+5) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch_0 += ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 += ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+1) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch_0 += ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 += ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+5) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch[0] = ch_0 / 4; - ch[1] = ch_1 / 4; - + ch0=c32x16mulShift(*pil, + rxdataF[soffset + nushift + re_offset], + 15); + pil++; + re_offset = (re_offset+1) % symbolSize; + ch0=c32x16maddShift(*pil, + rxdataF[nushift+re_offset], + ch0, + 15); + pil++; + re_offset = (re_offset+5) % symbolSize; + ch0=c32x16maddShift(*pil, + rxdataF[nushift+re_offset], + ch0, + 15); + re_offset = (re_offset+1) % symbolSize; + ch0=c32x16maddShift(*pil, + rxdataF[nushift+re_offset], + ch0, + 15); + pil++; + re_offset = (re_offset+5) % symbolSize; + + c16_t ch=c16x32div(ch0, 4); #if NO_INTERP - for (int i=0;i<12;i++) ((int32_t*)ul_ch)[i] = *(int32_t*)ch; - ul_ch+=24; + for (c16_t *end=ul_ch+12; ul_ch<end; ul_ch++) + *ul_ch=ch; #else - multadd_real_vector_complex_scalar(filt8_avlip0, - ch, - ul_ch, - 8); - - ul_ch += 16; - multadd_real_vector_complex_scalar(filt8_avlip1, - ch, - ul_ch, - 8); - - ul_ch += 16; - multadd_real_vector_complex_scalar(filt8_avlip2, - ch, - ul_ch, - 8); - ul_ch -= 24; + c16multaddVectRealComplex(filt8_avlip0, &ch, ul_ch, 8); + ul_ch += 8; + c16multaddVectRealComplex(filt8_avlip1, &ch, ul_ch, 8); + ul_ch += 8; + c16multaddVectRealComplex(filt8_avlip2, &ch, ul_ch, 8); + ul_ch -= 12; #endif - for (pilot_cnt=4; pilot_cnt<4*(nb_rb_pusch-1); pilot_cnt += 4) { - - ch_0 = ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 = ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+1) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch_0 += ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 += ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; + for (int pilot_cnt=4; pilot_cnt<4*(nb_rb_pusch-1); pilot_cnt += 4) { + c32_t ch0; + ch0=c32x16mulShift(*pil, rxdataF[nushift+re_offset], 15); + pil++; + re_offset = (re_offset+1) % symbolSize; - pil += 2; - re_offset = (re_offset+5) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; + ch0=c32x16maddShift(*pil, rxdataF[nushift+re_offset], ch0, 15); + pil++; + re_offset = (re_offset+5) % symbolSize; - ch_0 += ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 += ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; + ch0=c32x16maddShift(*pil, rxdataF[nushift+re_offset], ch0, 15); + pil++; + re_offset = (re_offset+1) % symbolSize; - pil += 2; - re_offset = (re_offset+1) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; + ch0=c32x16maddShift(*pil, rxdataF[nushift+re_offset], ch0, 15); + pil++; + re_offset = (re_offset+5) % symbolSize; - ch_0 += ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 += ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+5) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch[0] = ch_0 / 4; - ch[1] = ch_1 / 4; + ch=c16x32div(ch0, 4); #if NO_INTERP - for (int i=0;i<12;i++) ((int32_t*)ul_ch)[i] = *(int32_t*)ch; - ul_ch+=24; + for (c16_t *end=ul_ch+12; ul_ch<end; ul_ch++) + *ul_ch=ch; #else - ul_ch[6] += (ch[0] * 1365)>>15; // 1/12*16384 - ul_ch[7] += (ch[1] * 1365)>>15; // 1/12*16384 - + ul_ch[3]=c16maddShift(ch,(c16_t) {1365,1365},15); // 1365 = 1/12*16384 (full range is +/- 32768) + ul_ch += 4; + c16multaddVectRealComplex(filt8_avlip3, &ch, ul_ch, 8); ul_ch += 8; - multadd_real_vector_complex_scalar(filt8_avlip3, - ch, - ul_ch, - 8); - - ul_ch += 16; - multadd_real_vector_complex_scalar(filt8_avlip4, - ch, - ul_ch, - 8); - - ul_ch += 16; - multadd_real_vector_complex_scalar(filt8_avlip5, - ch, - ul_ch, - 8); - ul_ch -= 16; + c16multaddVectRealComplex(filt8_avlip4, &ch, ul_ch, 8); + ul_ch += 8; + c16multaddVectRealComplex(filt8_avlip5, &ch, ul_ch, 8); + ul_ch -= 8; #endif } - // Last PRB - ch_0 = ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 = ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+1) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch_0 += ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 += ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+5) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch_0 += ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 += ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+1) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch_0 += ((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15; - ch_1 += ((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15; - - pil += 2; - re_offset = (re_offset+5) % gNB->frame_parms.ofdm_symbol_size; - rxF = (int16_t *)&rxdataF[aarx][(symbol_offset+nushift+re_offset)]; - - ch[0] = ch_0 / 4; - ch[1] = ch_1 / 4; + // Last PRB + ch0=c32x16mulShift(*pil, rxdataF[nushift+re_offset], 15); + pil++; + re_offset = (re_offset+1) % symbolSize; + + ch0=c32x16maddShift(*pil, rxdataF[nushift+re_offset], ch0, 15); + pil++; + re_offset = (re_offset+5) % symbolSize; + + ch0=c32x16maddShift(*pil, rxdataF[nushift+re_offset], ch0, 15); + pil++; + re_offset = (re_offset+1) % symbolSize; + + ch0=c32x16maddShift(*pil, rxdataF[nushift+re_offset], ch0, 15); + pil++; + re_offset = (re_offset+5) % symbolSize; + + ch=c16x32div(ch0, 4); #if NO_INTERP - for (int i=0;i<12;i++) ((int32_t*)ul_ch)[i] = *(int32_t*)ch; - ul_ch+=24; + for (c16_t *end=ul_ch+12; ul_ch<end; ul_ch++) + *ul_ch=ch; #else - ul_ch[6] += (ch[0] * 1365)>>15; // 1/12*16384 - ul_ch[7] += (ch[1] * 1365)>>15; // 1/12*16384 - + ul_ch[3]=c16maddShift(ch, c16_t {1365,1365},15);// 1365 = 1/12*16384 (full range is +/- 32768) + ul_ch += 4; + c16multaddVectRealComplex(filt8_avlip3, &ch, ul_ch, 8); ul_ch += 8; - multadd_real_vector_complex_scalar(filt8_avlip3, - ch, - ul_ch, - 8); - - ul_ch += 16; - multadd_real_vector_complex_scalar(filt8_avlip6, - ch, - ul_ch, - 8); + c16multaddVectRealComplex(filt8_avlip6, &ch, ul_ch, 8); #endif } + #ifdef DEBUG_PUSCH - ul_ch = (int16_t *)&ul_ch_estimates[p*gNB->frame_parms.nb_antennas_rx+aarx][ch_offset]; - for(uint16_t idxP=0; idxP<ceil((float)nb_rb_pusch*12/8); idxP++) { - for(uint8_t idxI=0; idxI<16; idxI += 2) { - printf("%d\t%d\t",ul_ch[idxP*16+idxI],ul_ch[idxP*16+idxI+1]); + ul_ch = &ul_ch_estimates[p*gNB->frame_parms.nb_antennas_rx+aarx][ch_offset]; + + for(int idxP=0; idxP<ceil((float)nb_rb_pusch*12/8); idxP++) { + for(int idxI=0; idxI<8; idxI++) { + printf("%d\t%d\t",ul_ch[idxP*8+idxI].r,ul_ch[idxP*8+idxI].i); } + printf("%d\n",idxP); } -#endif +#endif // Convert to time domain - freq2time(gNB->frame_parms.ofdm_symbol_size, - (int16_t*) &ul_ch_estimates[aarx][symbol_offset], - (int16_t*) ul_ch_estimates_time[aarx]); + freq2time(symbolSize, + (int16_t *) &ul_ch_estimates[aarx][symbol_offset], + (int16_t *) gNB->pusch_vars[ul_id]->ul_ch_estimates_time[aarx]); } #ifdef DEBUG_CH fclose(debug_ch_est); #endif - return(0); } @@ -852,12 +607,10 @@ void nr_pusch_ptrs_processing(PHY_VARS_gNB *gNB, uint8_t ulsch_id, uint8_t nr_tti_rx, unsigned char symbol, - uint32_t nb_re_pusch) -{ + uint32_t nb_re_pusch) { //#define DEBUG_UL_PTRS 1 int32_t *ptrs_re_symbol = NULL; int8_t ret = 0; - uint8_t symbInSlot = rel15_ul->start_symbol_index + rel15_ul->nr_of_symbols; uint8_t *startSymbIndex = &rel15_ul->start_symbol_index; uint8_t *nbSymb = &rel15_ul->nr_of_symbols; @@ -869,6 +622,7 @@ void nr_pusch_ptrs_processing(PHY_VARS_gNB *gNB, uint8_t *dmrsConfigType = &rel15_ul->dmrs_config_type; uint16_t *nb_rb = &rel15_ul->rb_size; uint8_t *ptrsReOffset = &rel15_ul->pusch_ptrs.ptrs_ports_list[0].ptrs_re_offset; + /* loop over antennas */ for (int aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { c16_t *phase_per_symbol = (c16_t*)gNB->pusch_vars[ulsch_id]->ptrs_phase_per_slot[aarx]; @@ -895,8 +649,10 @@ void nr_pusch_ptrs_processing(PHY_VARS_gNB *gNB, 1<< *L_ptrs, *dmrsSymbPos); } + /* if not PTRS symbol set current ptrs symbol index to zero*/ *ptrsSymbIdx = 0; + /* Check if current symbol contains PTRS */ if(is_ptrs_symbol(symbol, *ptrsSymbPos)) { *ptrsSymbIdx = symbol; @@ -907,11 +663,12 @@ void nr_pusch_ptrs_processing(PHY_VARS_gNB *gNB, rel15_ul->rnti, nr_tti_rx, symbol,frame_parms->ofdm_symbol_size, - (int16_t*)&gNB->pusch_vars[ulsch_id]->rxdataF_comp[aarx][(symbol * nb_re_pusch)], + (int16_t *)&gNB->pusch_vars[ulsch_id]->rxdataF_comp[aarx][(symbol * nb_re_pusch)], gNB->nr_gold_pusch_dmrs[rel15_ul->scid][nr_tti_rx][symbol], (int16_t*)&phase_per_symbol[symbol], ptrs_re_symbol); } + /* For last OFDM symbol at each antenna perform interpolation and compensation for the slot*/ if(symbol == (symbInSlot -1)) { /*------------------------------------------------------------------------------------------------------- */ @@ -924,16 +681,18 @@ void nr_pusch_ptrs_processing(PHY_VARS_gNB *gNB, LOG_W(PHY,"[PTRS] Compensation is skipped due to error in PTRS slot processing !!\n"); } } + #ifdef DEBUG_UL_PTRS LOG_M("ptrsEstUl.m","est",gNB->pusch_vars[ulsch_id]->ptrs_phase_per_slot[aarx],frame_parms->symbols_per_slot,1,1 ); LOG_M("rxdataF_bf_ptrs_comp_ul.m","bf_ptrs_cmp", &gNB->pusch_vars[0]->rxdataF_comp[aarx][rel15_ul->start_symbol_index * NR_NB_SC_PER_RB * rel15_ul->rb_size], rel15_ul->nr_of_symbols * NR_NB_SC_PER_RB * rel15_ul->rb_size,1,1); #endif + /*------------------------------------------------------------------------------------------------------- */ /* 3) Compensated DMRS based estimated signal with PTRS estimation */ /*--------------------------------------------------------------------------------------------------------*/ - for(uint8_t i = *startSymbIndex; i< symbInSlot ;i++) { + for(uint8_t i = *startSymbIndex; i< symbInSlot ; i++) { /* DMRS Symbol has 0 phase so no need to rotate the respective symbol */ /* Skip rotation if the slot processing is wrong */ if((!is_dmrs_symbol(i,*dmrsSymbPos)) && (ret == 0)) { @@ -957,6 +716,7 @@ uint32_t calc_power(const int16_t *x, const uint32_t size) { sum_x = sum_x + x[k]; sum_x2 = sum_x2 + x[k]*x[k]; } + return sum_x2/size - (sum_x/size)*(sum_x/size); } @@ -1252,7 +1012,7 @@ int nr_srs_channel_estimation(const PHY_VARS_gNB *gNB, *noise_power = max(calc_power(noise_real,frame_parms->nb_antennas_rx*N_ap*M_sc_b_SRS) + calc_power(noise_imag,frame_parms->nb_antennas_rx*N_ap*M_sc_b_SRS), 1); - *snr = dB_fixed((int32_t)((*signal_power<<factor_bits)/(*noise_power))) - factor_dB; + *snr = dB_fixed((int32_t)((*signal_power<<factor_bits)/(*noise_power))) - factor_dB; #ifdef SRS_DEBUG LOG_I(NR_PHY,"noise_power = %u, SNR = %i dB\n", *noise_power, *snr); diff --git a/openair1/PHY/NR_TRANSPORT/srs_rx.c b/openair1/PHY/NR_TRANSPORT/srs_rx.c index 54264b70e0a07599df7646da16d6a015fa371d41..ba95b9ac4499bb69610ef26c2a76b946f8110df6 100644 --- a/openair1/PHY/NR_TRANSPORT/srs_rx.c +++ b/openair1/PHY/NR_TRANSPORT/srs_rx.c @@ -183,4 +183,4 @@ int nr_get_srs_signal(PHY_VARS_gNB *gNB, } else { return 0; } -} \ No newline at end of file +} diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_demodulation.c b/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_demodulation.c index 6728f0260c093cdaacee71d757bbcee9c5c0e5d7..88cc419dc166a555752c819cae73512c94e098bb 100644 --- a/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_demodulation.c +++ b/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_demodulation.c @@ -480,45 +480,45 @@ int nr_rx_pdsch(PHY_VARS_NR_UE *ue, nbSymb = dlsch1_harq->nb_symbols; pduBitmap = dlsch1_harq->pduBitmap; } - + /* Check for PTRS bitmap and process it respectively */ if((pduBitmap & 0x1) && (type == PDSCH)) { nr_pdsch_ptrs_processing(ue, - pdsch_vars, - frame_parms, - dlsch0_harq, - dlsch1_harq, - gNB_id, - nr_slot_rx, - symbol, - (nb_rb_pdsch*12), - dlsch[0]->rnti,rx_type); + pdsch_vars, + frame_parms, + dlsch0_harq, + dlsch1_harq, + gNB_id, + nr_slot_rx, + symbol, + (nb_rb_pdsch*12), + dlsch[0]->rnti,rx_type); pdsch_vars[gNB_id]->dl_valid_re[symbol-1] -= pdsch_vars[gNB_id]->ptrs_re_per_slot[0][symbol]; } - + /* at last symbol in a slot calculate LLR's for whole slot */ if(symbol == (startSymbIdx + nbSymb -1)) { for(uint8_t i =startSymbIdx; i < (startSymbIdx+nbSymb);i++) { /* re evaluating the first symbol flag as LLR's are done in symbol loop */ if(i == startSymbIdx && i < 3) { - first_symbol_flag =1; + first_symbol_flag =1; } else { - first_symbol_flag=0; + first_symbol_flag=0; } /* Calculate LLR's for each symbol */ nr_dlsch_llr(pdsch_vars, frame_parms, - rxdataF_comp_ptr, dl_ch_mag_ptr, - dlsch0_harq, dlsch1_harq, - rx_type, harq_pid, - gNB_id, gNB_id_i, - first_symbol_flag, - i, nb_rb_pdsch, - codeword_TB0, codeword_TB1, - pdsch_vars[gNB_id]->dl_valid_re[i-1], - nr_slot_rx, beamforming_mode); + rxdataF_comp_ptr, dl_ch_mag_ptr, + dlsch0_harq, dlsch1_harq, + rx_type, harq_pid, + gNB_id, gNB_id_i, + first_symbol_flag, + i, nb_rb_pdsch, + codeword_TB0, codeword_TB1, + pdsch_vars[gNB_id]->dl_valid_re[i-1], + nr_slot_rx, beamforming_mode); } - + int dmrs_type = dlsch[0]->harq_processes[harq_pid]->dmrsConfigType; uint8_t nb_re_dmrs; uint16_t dmrs_len = get_num_dmrs(dlsch[0]->harq_processes[harq_pid]->dlDmrsSymbPos); @@ -528,29 +528,29 @@ int nr_rx_pdsch(PHY_VARS_NR_UE *ue, nb_re_dmrs = 4*dlsch[0]->harq_processes[harq_pid]->n_dmrs_cdm_groups; } dlsch[0]->harq_processes[harq_pid]->G = nr_get_G(dlsch[0]->harq_processes[harq_pid]->nb_rb, - dlsch[0]->harq_processes[harq_pid]->nb_symbols, - nb_re_dmrs, - dmrs_len, - dlsch[0]->harq_processes[harq_pid]->Qm, - dlsch[0]->harq_processes[harq_pid]->Nl); + dlsch[0]->harq_processes[harq_pid]->nb_symbols, + nb_re_dmrs, + dmrs_len, + dlsch[0]->harq_processes[harq_pid]->Qm, + dlsch[0]->harq_processes[harq_pid]->Nl); nr_dlsch_layer_demapping(pdsch_vars[gNB_id]->llr, - dlsch[0]->harq_processes[harq_pid]->Nl, - dlsch[0]->harq_processes[harq_pid]->Qm, - dlsch[0]->harq_processes[harq_pid]->G, - codeword_TB0, - codeword_TB1, - pdsch_vars[gNB_id]->layer_llr); + dlsch[0]->harq_processes[harq_pid]->Nl, + dlsch[0]->harq_processes[harq_pid]->Qm, + dlsch[0]->harq_processes[harq_pid]->G, + codeword_TB0, + codeword_TB1, + pdsch_vars[gNB_id]->layer_llr); } - + stop_meas(&ue->generic_stat_bis[proc->thread_id][slot]); if (cpumeas(CPUMEAS_GETSTATE)) LOG_D(PHY, "[AbsSFN %u.%d] Slot%d Symbol %d: LLR Computation %5.2f \n",frame,nr_slot_rx,slot,symbol,ue->generic_stat_bis[proc->thread_id][slot].p_time/(cpuf*1000.0)); - + // Please keep it: useful for debugging #ifdef DEBUG_PDSCH_RX char filename[50]; uint8_t aa = 0; - + snprintf(filename, 50, "rxdataF0_symb_%d_nr_slot_rx_%d.m", symbol, nr_slot_rx); write_output(filename, "rxdataF0", &common_vars->common_vars_rx_data_per_thread[proc->thread_id].rxdataF[0][0], NR_SYMBOLS_PER_SLOT*frame_parms->ofdm_symbol_size, 1, 1); diff --git a/openair1/PHY/TOOLS/8bit_rxdemux.c b/openair1/PHY/TOOLS/8bit_rxdemux.c deleted file mode 100644 index 030be47b5daf1a04b524de31f36eb93f314a8e4d..0000000000000000000000000000000000000000 --- a/openair1/PHY/TOOLS/8bit_rxdemux.c +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The OpenAirInterface Software Alliance licenses this file to You under - * the OAI Public License, Version 1.1 (the "License"); you may not use this file - * except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.openairinterface.org/?page_id=698 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------------------- - * For more information about the OpenAirInterface (OAI) Software Alliance: - * contact@openairinterface.org - */ - -#include "PHY/types.h" -#include "PHY/defs.h" -#include "PHY/extern.h" - - -void bit8_rxdemux(int length,int offset) -{ - - int i; - short *rx1_ptr = (short *)&PHY_vars->rx_vars[1].RX_DMA_BUFFER[offset]; - short *rx0_ptr = (short *)&PHY_vars->rx_vars[0].RX_DMA_BUFFER[offset]; - char *rx0_ptr2 = (char *)(&PHY_vars->rx_vars[0].RX_DMA_BUFFER[offset]); - // short tmp; - short r0,i0,r1,i1; - - // printf("demuxing: %d,%d\n",length,offset); - - // printf("%x %x\n",PHY_vars->chsch_data[0].CHSCH_f_sync[0], PHY_vars->chsch_data[0].CHSCH_f_sync[1]); - - for (i=0; i<length; i++) { - - - - - r0= (short)(rx0_ptr2[i<<2]); // Re 0 - - i0= (short)(rx0_ptr2[(i<<2)+1]); // Im 0 - - r1= (short)(rx0_ptr2[(i<<2)+2]); // Re 1 - - i1= (short)(rx0_ptr2[(i<<2)+3]); // Im 1 - - rx0_ptr[(i<<1)] = r0; - rx0_ptr[(i<<1)+1] =i0; - rx1_ptr[i<<1] =r1; - rx1_ptr[(i<<1)+1] =i1; - } - - // printf("%x %x\n",PHY_vars->chsch_data[0].CHSCH_f_sync[0], PHY_vars->chsch_data[0].CHSCH_f_sync[1]); - -} diff --git a/openair1/PHY/TOOLS/8bit_txmux.c b/openair1/PHY/TOOLS/8bit_txmux.c deleted file mode 100644 index b103bc9c5cd9b49a4d0df7411da79410c518250c..0000000000000000000000000000000000000000 --- a/openair1/PHY/TOOLS/8bit_txmux.c +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The OpenAirInterface Software Alliance licenses this file to You under - * the OAI Public License, Version 1.1 (the "License"); you may not use this file - * except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.openairinterface.org/?page_id=698 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------------------- - * For more information about the OpenAirInterface (OAI) Software Alliance: - * contact@openairinterface.org - */ - -#include "PHY/types.h" -#include "PHY/defs.h" -#include "PHY/extern.h" - - -void bit8_txmux(int length,int offset) -{ - - int i; - short *dest,*dest2; - - - - - for (i=0; i<length; i++) { - - dest = (short *)&PHY_vars->tx_vars[0].TX_DMA_BUFFER[i+offset]; - dest2 = (short *)&PHY_vars->tx_vars[1].TX_DMA_BUFFER[i+offset]; - - ((char *)dest)[0] = (char)(dest[0]>>BIT8_TX_SHIFT); - ((char *)dest)[1] = (char)(dest[1]>>BIT8_TX_SHIFT); - ((char *)dest)[2] = (char)(dest2[0]>>BIT8_TX_SHIFT); - ((char *)dest)[3] = (char)(dest2[1]>>BIT8_TX_SHIFT); - } - - -} diff --git a/openair1/PHY/TOOLS/cadd_sv.c b/openair1/PHY/TOOLS/cadd_sv.c deleted file mode 100644 index 56dcb718478c25ad843bee62eabfa535e6c11d72..0000000000000000000000000000000000000000 --- a/openair1/PHY/TOOLS/cadd_sv.c +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The OpenAirInterface Software Alliance licenses this file to You under - * the OAI Public License, Version 1.1 (the "License"); you may not use this file - * except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.openairinterface.org/?page_id=698 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------------------- - * For more information about the OpenAirInterface (OAI) Software Alliance: - * contact@openairinterface.org - */ - -#include "defs.h" - -static __m128i alpha_128 __attribute__ ((aligned(16))); -static __m128i shift __attribute__ ((aligned(16))); - -int add_cpx_vector(short *x, - short *alpha, - short *y, - unsigned int N) -{ - unsigned int i; // loop counter - - __m128i *x_128; - __m128i *y_128; - - x_128 = (__m128i *)&x[0]; - y_128 = (__m128i *)&y[0]; - - alpha_128 = _mm_set1_epi32(*((int*)alpha)); - - // we compute 4 cpx multiply for each loop - for(i=0; i<(N>>3); i++) { - y_128[0] = _mm_adds_epi16(alpha_128, x_128[0]); - y_128[1] = _mm_adds_epi16(alpha_128, x_128[1]); - y_128[2] = _mm_adds_epi16(alpha_128, x_128[2]); - y_128[3] = _mm_adds_epi16(alpha_128, x_128[3]); - - - x_128+=4; - y_128 +=4; - - } - - return (0); -} - -int add_vector32_scalar(short *x, - int alpha, - short *y, - unsigned int N) -{ - unsigned int i; // loop counter - - __m128i *x_128; - __m128i *y_128; - - x_128 = (__m128i *)&x[0]; - y_128 = (__m128i *)&y[0]; - - alpha_128 = _mm_setr_epi32(alpha,0,alpha,0); - - // we compute 4 cpx multiply for each loop - for(i=0; i<(N>>3); i++) { - y_128[0] = _mm_add_epi32(alpha_128, x_128[0]); - y_128[1] = _mm_add_epi32(alpha_128, x_128[1]); - y_128[2] = _mm_add_epi32(alpha_128, x_128[2]); - y_128[3] = _mm_add_epi32(alpha_128, x_128[3]); - - - x_128+=4; - y_128 +=4; - - } - - return (0); -} - - -int add_real_vector64_scalar(short *x, - long long int a, - short *y, - unsigned int N) -{ - unsigned int i; // loop counter - - __m128i *x_128; - __m128i *y_128; - - x_128 = (__m128i *)&x[0]; - y_128 = (__m128i *)&y[0]; - - alpha_128 = _mm_set1_epi64((__m64) a); - - // we compute 4 cpx multiply for each loop - for(i=0; i<(N>>3); i++) { - y_128[0] = _mm_add_epi64(alpha_128, x_128[0]); - y_128[1] = _mm_add_epi64(alpha_128, x_128[1]); - y_128[2] = _mm_add_epi64(alpha_128, x_128[2]); - y_128[3] = _mm_add_epi64(alpha_128, x_128[3]); - - - x_128+=4; - y_128+=4; - - } - - return(0); -} - - -#ifdef MAIN -#include <stdio.h> - -main () -{ - - short input[256] __attribute__((aligned(16))); - short output[256] __attribute__((aligned(16))); - - int i; - c16_t alpha; - - Zero_Buffer(output,256*2); - - input[0] = 100; - input[1] = 200; - input[2] = 100; - input[3] = 200; - input[4] = 1234; - input[5] = -1234; - input[6] = 1234; - input[7] = -1234; - input[8] = 100; - input[9] = 200; - input[10] = 100; - input[11] = 200; - input[12] = 1000; - input[13] = 2000; - input[14] = 1000; - input[15] = 2000; - - alpha.r = 10; - alpha.i = -10; - - add_cpx_vector(input,(short*) &alpha,input,8); - - for (i=0; i<16; i+=2) - printf("output[%d] = %d + %d i\n",i,input[i],input[i+1]); - -} - -#endif //MAIN diff --git a/openair1/PHY/TOOLS/cadd_vv.c b/openair1/PHY/TOOLS/cadd_vv.c index 08bf514cb357b2f032b3922810b0440f2d29cfb0..88a2bd98140a71b6c73a3273d01e6c980ab53dd1 100644 --- a/openair1/PHY/TOOLS/cadd_vv.c +++ b/openair1/PHY/TOOLS/cadd_vv.c @@ -23,153 +23,6 @@ #include "tools_defs.h" -int add_vector16(short *x, - short *y, - short *z, - unsigned int N) -{ - unsigned int i; // loop counter - - __m128i *x_128; - __m128i *y_128; - __m128i *z_128; - - x_128 = (__m128i *)&x[0]; - y_128 = (__m128i *)&y[0]; - z_128 = (__m128i *)&z[0]; - - - for(i=0; i<(N>>5); i++) { - /* - printf("i = %d\n",i); - print_shorts(x_128[0],"x[0]="); - print_shorts(y_128[0],"y[0]="); - */ - - z_128[0] = _mm_adds_epi16(x_128[0],y_128[0]); - /* - print_shorts(z_128[0],"z[0]="); - - print_shorts(x_128[1],"x[1]="); - print_shorts(y_128[1],"y[1]="); - */ - - z_128[1] = _mm_adds_epi16(x_128[1],y_128[1]); - /* - print_shorts(z_128[1],"z[1]="); - - print_shorts(x_128[2],"x[2]="); - print_shorts(y_128[2],"y[2]="); - */ - - z_128[2] = _mm_adds_epi16(x_128[2],y_128[2]); - /* - print_shorts(z_128[2],"z[2]="); - - print_shorts(x_128[3],"x[3]="); - print_shorts(y_128[3],"y[3]="); - */ - - z_128[3] = _mm_adds_epi16(x_128[3],y_128[3]); - /* - print_shorts(z_128[3],"z[3]="); - */ - - - - - - x_128 +=4; - y_128 +=4; - z_128 +=4; - - } - - _mm_empty(); - _m_empty(); - - return(0); -} - -/* -print_shorts64(__m64 x,char *s) { - - printf("%s ",s); - printf("%d %d %d %d\n",((short *)&x)[0],((short *)&x)[1],((short *)&x)[2],((short *)&x)[3]); - -} -*/ - -int add_vector16_64(short *x, - short *y, - short *z, - unsigned int N) -{ - unsigned int i; // loop counter - - __m64 *x_64; - __m64 *y_64; - __m64 *z_64; - - x_64 = (__m64 *)&x[0]; - y_64 = (__m64 *)&y[0]; - z_64 = (__m64 *)&z[0]; - - - for(i=0; i<(N>>2); i++) { - /* - printf("i = %d\n",i); - print_shorts64(x_64[i],"x[i]="); - print_shorts64(y_64[i],"y[i]="); - */ - - z_64[i] = _mm_adds_pi16(x_64[i],y_64[i]); - /* - print_shorts64(z_64[i],"z[i]="); - */ - - - } - - _mm_empty(); - _m_empty(); - return(0); -} - -int add_cpx_vector32(short *x, - short *y, - short *z, - unsigned int N) -{ - unsigned int i; // loop counter - - __m128i *x_128; - __m128i *y_128; - __m128i *z_128; - - x_128 = (__m128i *)&x[0]; - y_128 = (__m128i *)&y[0]; - z_128 = (__m128i *)&z[0]; - - - for(i=0; i<(N>>3); i++) { - z_128[0] = _mm_add_epi32(x_128[0],y_128[0]); - z_128[1] = _mm_add_epi32(x_128[1],y_128[1]); - z_128[2] = _mm_add_epi32(x_128[2],y_128[2]); - z_128[3] = _mm_add_epi32(x_128[3],y_128[3]); - - - x_128+=4; - y_128 +=4; - z_128 +=4; - - } - - _mm_empty(); - _m_empty(); - return(0); -} - int32_t sub_cpx_vector16(int16_t *x, int16_t *y, int16_t *z, @@ -201,73 +54,6 @@ int32_t sub_cpx_vector16(int16_t *x, -int add_real_vector64(short *x, - short* y, - short *z, - unsigned int N) -{ - unsigned int i; // loop counter - - __m128i *x_128; - __m128i *y_128; - __m128i *z_128; - - x_128 = (__m128i *)&x[0]; - y_128 = (__m128i *)&y[0]; - z_128 = (__m128i *)&z[0]; - - for(i=0; i<(N>>3); i++) { - z_128[0] = _mm_add_epi64(x_128[0], y_128[0]); - z_128[1] = _mm_add_epi64(x_128[1], y_128[1]); - z_128[2] = _mm_add_epi64(x_128[2], y_128[2]); - z_128[3] = _mm_add_epi64(x_128[3], y_128[3]); - - - x_128+=4; - y_128+=4; - z_128+=4; - - } - - _mm_empty(); - _m_empty(); - return(0); -} - -int sub_real_vector64(short *x, - short* y, - short *z, - unsigned int N) -{ - unsigned int i; // loop counter - - __m128i *x_128; - __m128i *y_128; - __m128i *z_128; - - x_128 = (__m128i *)&x[0]; - y_128 = (__m128i *)&y[0]; - z_128 = (__m128i *)&z[0]; - - for(i=0; i<(N>>3); i++) { - z_128[0] = _mm_sub_epi64(x_128[0], y_128[0]); - z_128[1] = _mm_sub_epi64(x_128[1], y_128[1]); - z_128[2] = _mm_sub_epi64(x_128[2], y_128[2]); - z_128[3] = _mm_sub_epi64(x_128[3], y_128[3]); - - - x_128+=4; - y_128+=4; - z_128+=4; - - } - - _mm_empty(); - _m_empty(); - return(0); -} - - #ifdef MAIN #include <stdio.h> @@ -280,7 +66,7 @@ main () int i; c16_t alpha; - Zero_Buffer(output,256*2); + memset(output,0,256*2); input[0] = 100; input[1] = 200; diff --git a/openair1/PHY/TOOLS/cmult_sv.c b/openair1/PHY/TOOLS/cmult_sv.c index fb2f4d589e5e43d87d7fa8ab0bd4410adb88bf1c..a596446a44ab2609afdda1ca05db823347892014 100644 --- a/openair1/PHY/TOOLS/cmult_sv.c +++ b/openair1/PHY/TOOLS/cmult_sv.c @@ -22,28 +22,6 @@ #include "PHY/sse_intrin.h" #include "tools_defs.h" -#if defined(__x86_64__) || defined(__i386__) -#define simd_q15_t __m128i -#define simdshort_q15_t __m64 -#define shiftright_int16(a,shift) _mm_srai_epi16(a,shift) -#define set1_int16(a) _mm_set1_epi16(a) -#define mulhi_int16(a,b) _mm_slli_epi16(_mm_mulhi_epi16(a,b),1) -#define mulhi_s1_int16(a,b) _mm_slli_epi16(_mm_mulhi_epi16(a,b),2) -#define adds_int16(a,b) _mm_adds_epi16(a,b) -#define mullo_int16(a,b) _mm_mullo_epi16(a,b) -#elif defined(__arm__) -#define simd_q15_t int16x8_t -#define simdshort_q15_t int16x4_t -#define shiftright_int16(a,shift) vshrq_n_s16(a,shift) -#define set1_int16(a) vdupq_n_s16(a) -#define mulhi_int16(a,b) vqdmulhq_s16(a,b) -#define mulhi_s1_int16(a,b) vshlq_n_s16(vqdmulhq_s16(a,b),1) -#define adds_int16(a,b) vqaddq_s16(a,b) -#define mullo_int16(a,b) vmulq_s16(a,b) -#define _mm_empty() -#define _m_empty() -#endif - void multadd_complex_vector_real_scalar(int16_t *x, int16_t alpha, @@ -111,217 +89,138 @@ void multadd_real_vector_complex_scalar(int16_t *x, } } -void multadd_real_four_symbols_vector_complex_scalar(int16_t *x, - int16_t *alpha, - int16_t *y) -{ - - // do 8 multiplications at a time - simd_q15_t alpha_r_128,alpha_i_128,yr,yi,*x_128=(simd_q15_t*)x; - simd_q15_t y_128; - y_128 = _mm_loadu_si128((simd_q15_t*)y); - - alpha_r_128 = set1_int16(alpha[0]); - alpha_i_128 = set1_int16(alpha[1]); - - - yr = mulhi_s1_int16(alpha_r_128,x_128[0]); - yi = mulhi_s1_int16(alpha_i_128,x_128[0]); - y_128 = _mm_adds_epi16(y_128,_mm_unpacklo_epi16(yr,yi)); - y_128 = _mm_adds_epi16(y_128,_mm_unpackhi_epi16(yr,yi)); - - _mm_storeu_si128((simd_q15_t*)y, y_128); - - _mm_empty(); - _m_empty(); -} -#ifdef __AVX2__ void rotate_cpx_vector(c16_t *x, - c16_t *alpha, - c16_t *y, - uint32_t N, - uint16_t output_shift) + c16_t *alpha, + c16_t *y, + uint32_t N, + uint16_t output_shift) { - // Multiply elementwise two complex vectors of N elements - // x - input 1 in the format |Re0 Im0 |,......,|Re(N-1) Im(N-1)| - // We assume x1 with a dynamic of 15 bit maximum - // - // alpha - input 2 in the format |Re0 Im0| - // We assume x2 with a dynamic of 15 bit maximum - // - // y - output in the format |Re0 Im0|,......,|Re(N-1) Im(N-1)| - // - // N - the size f the vectors (this function does N cpx mpy. WARNING: N>=4; - // - // log2_amp - increase the output amplitude by a factor 2^log2_amp (default is 0) - // WARNING: log2_amp>0 can cause overflow!! - - uint32_t i; // loop counter - - - simd_q15_t *y_128,alpha_128; - int32_t *xd=(int32_t *)x; + // multiply a complex vector with a complex value (alpha) + // stores result in y + // N is the number of complex numbers + // output_shift reduces the result of the multiplication by this number of bits + //AssertFatal(N%8==0, "To be developped"); +#ifdef __AVX2__ + if ( (intptr_t)x%32 == 0 && !(intptr_t)y%32 == 0 && __builtin_cpu_supports("avx2")) { + // output is 32 bytes aligned, but not the input + + const c16_t for_re={alpha->r, -alpha->i}; + __m256i const alpha_for_real = _mm256_set1_epi32(*(uint32_t*)&for_re); + const c16_t for_im={alpha->i, alpha->r}; + __m256i const alpha_for_im= _mm256_set1_epi32(*(uint32_t*)&for_im); + __m256i const perm_mask = + _mm256_set_epi8(31,30,23,22,29,28,21,20,27,26,19,18,25,24,17,16, + 15,14,7,6,13,12,5,4,11,10,3,2,9,8,1,0); + __m256i* xd= (__m256i*)x; + const __m256i *end=xd+N/8; + for( __m256i* yd = (__m256i *)y; xd<end ; yd++, xd++) { + const __m256i xre = _mm256_srai_epi32(_mm256_madd_epi16(*xd,alpha_for_real), + output_shift); + const __m256i xim = _mm256_srai_epi32(_mm256_madd_epi16(*xd,alpha_for_im), + output_shift); + // a bit faster than unpacklo+unpackhi+packs + const __m256i tmp=_mm256_packs_epi32(xre,xim); + *yd=_mm256_shuffle_epi8(tmp,perm_mask); + } + c16_t* alpha16=(c16_t*) alpha, *yLast; + yLast=((c16_t*)y)+(N/8)*8; + for (c16_t* xTail=(c16_t*) end; + xTail<((c16_t*) x)+N; + xTail++, yLast++) { + *yLast=c16mulShift(*xTail,*alpha16,output_shift); + } + } else { +#endif + // Multiply elementwise two complex vectors of N elements + // x - input 1 in the format |Re0 Im0 |,......,|Re(N-1) Im(N-1)| + // We assume x1 with a dynamic of 15 bit maximum + // + // alpha - input 2 in the format |Re0 Im0| + // We assume x2 with a dynamic of 15 bit maximum + // + // y - output in the format |Re0 Im0|,......,|Re(N-1) Im(N-1)| + // + // N - the size f the vectors (this function does N cpx mpy. WARNING: N>=4; + // + // log2_amp - increase the output amplitude by a factor 2^log2_amp (default is 0) + // WARNING: log2_amp>0 can cause overflow!! + + uint32_t i; // loop counter + + + simd_q15_t *y_128,alpha_128; + int32_t *xd=(int32_t *)x; #if defined(__x86_64__) || defined(__i386__) - __m128i shift = _mm_cvtsi32_si128(output_shift); - register simd_q15_t m0,m1,m2,m3; - - ((int16_t *)&alpha_128)[0] = alpha->r; - ((int16_t *)&alpha_128)[1] = -alpha->i; - ((int16_t *)&alpha_128)[2] = alpha->i; - ((int16_t *)&alpha_128)[3] = alpha->r; - ((int16_t *)&alpha_128)[4] = alpha->r; - ((int16_t *)&alpha_128)[5] = -alpha->i; - ((int16_t *)&alpha_128)[6] = alpha->i; - ((int16_t *)&alpha_128)[7] = alpha->r; + __m128i shift = _mm_cvtsi32_si128(output_shift); + register simd_q15_t m0,m1,m2,m3; + + ((int16_t *)&alpha_128)[0] = alpha->r; + ((int16_t *)&alpha_128)[1] = -alpha->i; + ((int16_t *)&alpha_128)[2] = alpha->i; + ((int16_t *)&alpha_128)[3] = alpha->r; + ((int16_t *)&alpha_128)[4] = alpha->r; + ((int16_t *)&alpha_128)[5] = -alpha->i; + ((int16_t *)&alpha_128)[6] = alpha->i; + ((int16_t *)&alpha_128)[7] = alpha->r; #elif defined(__arm__) - int32x4_t shift; - int32x4_t ab_re0,ab_re1,ab_im0,ab_im1,re32,im32; - int16_t reflip[8] __attribute__((aligned(16))) = {1,-1,1,-1,1,-1,1,-1}; - int32x4x2_t xtmp; - - ((int16_t *)&alpha_128)[0] = alpha->r; - ((int16_t *)&alpha_128)[1] = alpha->i; - ((int16_t *)&alpha_128)[2] = alpha->r; - ((int16_t *)&alpha_128)[3] = alpha->i; - ((int16_t *)&alpha_128)[4] = alpha->r; - ((int16_t *)&alpha_128)[5] = alpha->i; - ((int16_t *)&alpha_128)[6] = alpha->r; - ((int16_t *)&alpha_128)[7] = alpha->i; - int16x8_t bflip = vrev32q_s16(alpha_128); - int16x8_t bconj = vmulq_s16(alpha_128,*(int16x8_t *)reflip); - shift = vdupq_n_s32(-output_shift); + int32x4_t shift; + int32x4_t ab_re0,ab_re1,ab_im0,ab_im1,re32,im32; + int16_t reflip[8] __attribute__((aligned(16))) = {1,-1,1,-1,1,-1,1,-1}; + int32x4x2_t xtmp; + + ((int16_t *)&alpha_128)[0] = alpha->r; + ((int16_t *)&alpha_128)[1] = alpha->i; + ((int16_t *)&alpha_128)[2] = alpha->r; + ((int16_t *)&alpha_128)[3] = alpha->i; + ((int16_t *)&alpha_128)[4] = alpha->r; + ((int16_t *)&alpha_128)[5] = alpha->i; + ((int16_t *)&alpha_128)[6] = alpha->r; + ((int16_t *)&alpha_128)[7] = alpha->i; + int16x8_t bflip = vrev32q_s16(alpha_128); + int16x8_t bconj = vmulq_s16(alpha_128,*(int16x8_t *)reflip); + shift = vdupq_n_s32(-output_shift); #endif - y_128 = (simd_q15_t *) y; + y_128 = (simd_q15_t *) y; - for(i=0; i<N>>2; i++) { + for(i=0; i<N>>2; i++) { #if defined(__x86_64__) || defined(__i386__) - m0 = _mm_setr_epi32(xd[0],xd[0],xd[1],xd[1]); - m1 = _mm_setr_epi32(xd[2],xd[2],xd[3],xd[3]); - m2 = _mm_madd_epi16(m0,alpha_128); //complex multiply. result is 32bit [Re Im Re Im] - m3 = _mm_madd_epi16(m1,alpha_128); //complex multiply. result is 32bit [Re Im Re Im] - m2 = _mm_sra_epi32(m2,shift); // shift right by shift in order to compensate for the input amplitude - m3 = _mm_sra_epi32(m3,shift); // shift right by shift in order to compensate for the input amplitude - - y_128[0] = _mm_packs_epi32(m2,m3); // pack in 16bit integers with saturation [re im re im re im re im] - //print_ints("y_128[0]=", &y_128[0]); + m0 = _mm_setr_epi32(xd[0],xd[0],xd[1],xd[1]); + m1 = _mm_setr_epi32(xd[2],xd[2],xd[3],xd[3]); + m2 = _mm_madd_epi16(m0,alpha_128); //complex multiply. result is 32bit [Re Im Re Im] + m3 = _mm_madd_epi16(m1,alpha_128); //complex multiply. result is 32bit [Re Im Re Im] + m2 = _mm_sra_epi32(m2,shift); // shift right by shift in order to compensate for the input amplitude + m3 = _mm_sra_epi32(m3,shift); // shift right by shift in order to compensate for the input amplitude + + y_128[0] = _mm_packs_epi32(m2,m3); // pack in 16bit integers with saturation [re im re im re im re im] + //print_ints("y_128[0]=", &y_128[0]); #elif defined(__arm__) - ab_re0 = vmull_s16(((int16x4_t*)xd)[0],((int16x4_t*)&bconj)[0]); - ab_re1 = vmull_s16(((int16x4_t*)xd)[1],((int16x4_t*)&bconj)[1]); - ab_im0 = vmull_s16(((int16x4_t*)xd)[0],((int16x4_t*)&bflip)[0]); - ab_im1 = vmull_s16(((int16x4_t*)xd)[1],((int16x4_t*)&bflip)[1]); - re32 = vshlq_s32(vcombine_s32(vpadd_s32(((int32x2_t*)&ab_re0)[0],((int32x2_t*)&ab_re0)[1]), - vpadd_s32(((int32x2_t*)&ab_re1)[0],((int32x2_t*)&ab_re1)[1])), - shift); - im32 = vshlq_s32(vcombine_s32(vpadd_s32(((int32x2_t*)&ab_im0)[0],((int32x2_t*)&ab_im0)[1]), - vpadd_s32(((int32x2_t*)&ab_im1)[0],((int32x2_t*)&ab_im1)[1])), - shift); - - xtmp = vzipq_s32(re32,im32); + ab_re0 = vmull_s16(((int16x4_t*)xd)[0],((int16x4_t*)&bconj)[0]); + ab_re1 = vmull_s16(((int16x4_t*)xd)[1],((int16x4_t*)&bconj)[1]); + ab_im0 = vmull_s16(((int16x4_t*)xd)[0],((int16x4_t*)&bflip)[0]); + ab_im1 = vmull_s16(((int16x4_t*)xd)[1],((int16x4_t*)&bflip)[1]); + re32 = vshlq_s32(vcombine_s32(vpadd_s32(((int32x2_t*)&ab_re0)[0],((int32x2_t*)&ab_re0)[1]), + vpadd_s32(((int32x2_t*)&ab_re1)[0],((int32x2_t*)&ab_re1)[1])), + shift); + im32 = vshlq_s32(vcombine_s32(vpadd_s32(((int32x2_t*)&ab_im0)[0],((int32x2_t*)&ab_im0)[1]), + vpadd_s32(((int32x2_t*)&ab_im1)[0],((int32x2_t*)&ab_im1)[1])), + shift); + + xtmp = vzipq_s32(re32,im32); - y_128[0] = vcombine_s16(vmovn_s32(xtmp.val[0]),vmovn_s32(xtmp.val[1])); - -#endif + y_128[0] = vcombine_s16(vmovn_s32(xtmp.val[0]),vmovn_s32(xtmp.val[1])); - xd+=4; - y_128+=1; - } - - - _mm_empty(); - _m_empty(); - - return; -} #endif -/* -int mult_vector32_scalar(int16_t *x1, - int x2, - int16_t *y, - uint32_t N) -{ - // Multiply elementwise two real vectors of N elements - // x1 - input 1 in the format |Re(0) xxx Re(1) xxx|,......,|Re(N-2) xxx Re(N-1) xxx| - // We assume x1 with a dinamic of 31 bit maximum - // - // x1 - input 2 - // - // y - output in the format |Re0 (64bit) |,......,|Re(N-1) (64bit)| - // - // N - the size f the vectors (this function does N cpx mpy. WARNING: N>=4; - // - - uint32_t i; // loop counter - - __m128i *x1_128; - __m128i x2_128; - __m128i *y_128; - - - x1_128 = (__m128i *)&x1[0]; - x2_128 = _mm_setr_epi32(x2,0,x2,0); - y_128 = (__m128i *)&y[0]; - - - // we compute 4 cpx multiply for each loop - for(i=0; i<(N>>3); i++) { - y_128[0] = _mm_mul_epu32(x1_128[0],x2_128); - y_128[1] = _mm_mul_epu32(x1_128[1],x2_128); - y_128[2] = _mm_mul_epu32(x1_128[2],x2_128); - y_128[3] = _mm_mul_epu32(x1_128[3],x2_128); - - - x1_128+=4; - y_128 +=4; - } - - - _mm_empty(); - _m_empty(); - - return(0); -} -*/ - -int complex_conjugate(int16_t *x1, - int16_t *y, - uint32_t N) - -{ - uint32_t i; // loop counter - - simd_q15_t *x1_128; - simd_q15_t *y_128; - int16_t x2[8] __attribute__((aligned(16))) = {1,-1,1,-1,1,-1,1,-1}; - simd_q15_t *x2_128 = (simd_q15_t*)&x2[0]; - x1_128 = (simd_q15_t *)&x1[0]; - y_128 = (simd_q15_t *)&y[0]; - - - // we compute 4 cpx multiply for each loop - for(i=0; i<(N>>3); i++) { - y_128[0] = mullo_int16(x1_128[0],*x2_128); - y_128[1] = mullo_int16(x1_128[1],*x2_128); - y_128[2] = mullo_int16(x1_128[2],*x2_128); - y_128[3] = mullo_int16(x1_128[3],*x2_128); - - - x1_128+=4; - y_128 +=4; + xd+=4; + y_128+=1; + } } - - - _mm_empty(); - _m_empty(); - - return(0); } - #ifdef MAIN #define L 8 diff --git a/openair1/PHY/TOOLS/memory_routines.c b/openair1/PHY/TOOLS/memory_routines.c deleted file mode 100644 index cef9928988bd79cd809b0108810d65f3c74fa262..0000000000000000000000000000000000000000 --- a/openair1/PHY/TOOLS/memory_routines.c +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The OpenAirInterface Software Alliance licenses this file to You under - * the OAI Public License, Version 1.1 (the "License"); you may not use this file - * except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.openairinterface.org/?page_id=698 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *------------------------------------------------------------------------------- - * For more information about the OpenAirInterface (OAI) Software Alliance: - * contact@openairinterface.org - */ - -#include "PHY/sse_intrin.h" - -void Zero_Buffer(void *buf,unsigned int length) -{ - // zeroes the mmx_t buffer 'buf' starting from buf[0] to buf[length-1] in bytes - int i; - register __m64 mm0; - __m64 *mbuf = (__m64 *)buf; - - // length>>=3; // put length in quadwords - mm0 = _m_pxor(mm0,mm0); // clear the register - - for(i=0; i<length>>3; i++) // for each i - mbuf[i] = mm0; // put 0 in buf[i] - - _mm_empty(); -} - -void mmxcopy(void *dest,void *src,int size) -{ - - // copy size bytes from src to dest - register int i; - register __m64 mm0; - __m64 *mmsrc = (__m64 *)src, *mmdest= (__m64 *)dest; - - - - for (i=0; i<size>>3; i++) { - mm0 = mmsrc[i]; - mmdest[i] = mm0; - } - - _mm_empty(); -} - -void Zero_Buffer_nommx(void *buf,unsigned int length) -{ - - int i; - - for (i=0; i<length>>2; i++) - ((int *)buf)[i] = 0; - -} - diff --git a/openair1/PHY/TOOLS/oai_dfts.c b/openair1/PHY/TOOLS/oai_dfts.c index c2b6a55210822fd4e066adf7888d4463309f164d..cbedfd815a47e7001762c3377c17eb933a9e8dc5 100644 --- a/openair1/PHY/TOOLS/oai_dfts.c +++ b/openair1/PHY/TOOLS/oai_dfts.c @@ -2575,7 +2575,6 @@ const static int16_t tw64c[96] __attribute__((aligned(32))) = { #define simd_q15_t __m128i #define simdshort_q15_t __m64 #define shiftright_int16(a,shift) _mm_srai_epi16(a,shift) -#define set1_int16(a) _mm_set1_epi16(a); #define mulhi_int16(a,b) _mm_mulhrs_epi16 (a,b) #ifdef __AVX2__ #define simd256_q15_t __m256i diff --git a/openair1/PHY/TOOLS/tools_defs.h b/openair1/PHY/TOOLS/tools_defs.h index c471978f461bb9387cac167b5e5ff8f308d8c5b1..6b81575feeb30458bf428c35e8b2b73d9ff9ad83 100644 --- a/openair1/PHY/TOOLS/tools_defs.h +++ b/openair1/PHY/TOOLS/tools_defs.h @@ -29,46 +29,199 @@ */ -#ifdef __cplusplus -extern "C" { -#endif - #include <stdio.h> #include <stdint.h> #include <assert.h> #include "PHY/sse_intrin.h" #include "common/utils/assertions.h" +#if defined(__x86_64__) || defined(__i386__) +#define simd_q15_t __m128i +#define simdshort_q15_t __m64 +#define shiftright_int16(a,shift) _mm_srai_epi16(a,shift) +#define set1_int16(a) _mm_set1_epi16(a) +#define mulhi_int16(a,b) _mm_mulhrs_epi16 (a,b) +#define mulhi_s1_int16(a,b) _mm_slli_epi16(_mm_mulhi_epi16(a,b),2) +#define adds_int16(a,b) _mm_adds_epi16(a,b) +#define mullo_int16(a,b) _mm_mullo_epi16(a,b) +#elif defined(__arm__) +#define simd_q15_t int16x8_t +#define simdshort_q15_t int16x4_t +#define shiftright_int16(a,shift) vshrq_n_s16(a,shift) +#define set1_int16(a) vdupq_n_s16(a) +#define mulhi_int16(a,b) vqdmulhq_s16(a,b) +#define mulhi_s1_int16(a,b) vshlq_n_s16(vqdmulhq_s16(a,b),1) +#define adds_int16(a,b) vqaddq_s16(a,b) +#define mullo_int16(a,b) vmulq_s16(a,b) +#define _mm_empty() +#define _m_empty() +#endif + +#ifdef __cplusplus +extern "C" { +#endif + #define CEILIDIV(a,b) ((a+b-1)/b) #define ROUNDIDIV(a,b) (((a<<1)+b)/(b<<1)) -typedef struct complexd { - double r; - double i; -} cd_t; - -typedef struct complexf { - float r; - float i; -} cf_t; - -typedef struct complex16 { - int16_t r; - int16_t i; -} c16_t; - -typedef struct complex32 { - int32_t r; - int32_t i; -} c32_t; + typedef struct complexd { + double r; + double i; + } cd_t; + + typedef struct complexf { + float r; + float i; + } cf_t; + + typedef struct complex16 { + int16_t r; + int16_t i; + } c16_t; + + typedef struct complex32 { + int32_t r; + int32_t i; + } c32_t; + + typedef struct complex64 { + int64_t r; + int64_t i; + } c64_t; + +#define squaredMod(a) ((a).r*(a).r + (a).i*(a).i) +#define csum(res, i1, i2) (res).r = (i1).r + (i2).r ; (res).i = (i1).i + (i2).i + + __attribute__((always_inline)) inline c16_t c16mulShift(const c16_t a, const c16_t b, const int Shift) { + return (c16_t) { + .r = (int16_t)((a.r * b.r - a.i * b.i) >> Shift), + .i = (int16_t)((a.r * b.i + a.i * b.r) >> Shift) + }; + } -typedef struct complex64 { - int64_t r; - int64_t i; -} c64_t; + __attribute__((always_inline)) inline c16_t c16divShift(const c16_t a, const c16_t b, const int Shift) { + return (c16_t) { + .r = (int16_t)((a.r * b.r + a.i * b.i) >> Shift), + .i = (int16_t)((a.r * b.i - a.i * b.r) >> Shift) + }; + } + + __attribute__((always_inline)) inline c16_t c16maddShift(const c16_t a, const c16_t b, c16_t c, const int Shift) { + return (c16_t) { + .r = (int16_t)(((a.r * b.r - a.i * b.i ) >> Shift) + c.r), + .i = (int16_t)(((a.r * b.i + a.i * b.r ) >> Shift) + c.i) + }; + } + + __attribute__((always_inline)) inline c32_t c32x16mulShift(const c16_t a, const c16_t b, const int Shift) { + return (c32_t) { + .r = (a.r * b.r - a.i * b.i) >> Shift, + .i = (a.r * b.i + a.i * b.r) >> Shift + }; + } + + __attribute__((always_inline)) inline c32_t c32x16maddShift(const c16_t a, const c16_t b, const c32_t c, const int Shift) { + return (c32_t) { + .r = ((a.r * b.r - a.i * b.i) >> Shift) + c.r, + .i = ((a.r * b.i + a.i * b.r) >> Shift) + c.i + }; + } + + __attribute__((always_inline)) inline c16_t c16x32div(const c32_t a, const int div) { + return (c16_t) { + .r = (int16_t)(a.r / div), + .i = (int16_t)(a.i / div) + }; + } + + + // On N complex numbers + // y.r += (x * alpha.r) >> 14 + // y.i += (x * alpha.i) >> 14 + // See regular C implementation at the end + __attribute__((always_inline)) inline void c16multaddVectRealComplex(const int16_t *x, + const c16_t *alpha, + c16_t *y, + const int N) { +#ifdef __AVX2__ + const int8_t makePairs[32] __attribute__((aligned(32)))={ + 0,1,0+16,1+16, + 2,3,2+16,3+16, + 4,5,4+16,5+16, + 6,7,6+16,7+16, + 8,9,8+16,9+16, + 10,11,10+16,11+16, + 12,13,12+16,13+16, + 14,15,14+16,15+16}; + + __m256i alpha256= _mm256_set1_epi32(*(int32_t *)alpha); + __m128i *x128=(__m128i *)x; + __m128i *y128=(__m128i *)y; + AssertFatal(N%8==0,"Not implemented\n"); + for (int i=0; i<N/8; i++) { + const __m256i xduplicate=_mm256_broadcastsi128_si256(*x128); + const __m256i x_duplicate_ordered=_mm256_shuffle_epi8(xduplicate,*(__m256i*)makePairs); + const __m256i x_mul_alpha_shift15 =_mm256_mulhrs_epi16(alpha256, x_duplicate_ordered); + // Existing multiplication normalization is weird, constant table in alpha need to be doubled + const __m256i x_mul_alpha_x2= _mm256_adds_epi16(x_mul_alpha_shift15,x_mul_alpha_shift15); + *y128= _mm_adds_epi16(_mm256_extracti128_si256(x_mul_alpha_x2,0),*y128); + y128++; + *y128= _mm_adds_epi16(_mm256_extracti128_si256(x_mul_alpha_x2,1),*y128); + y128++; + x128++; + } + +#elif defined(__x86_64__) || defined(__i386__) || defined(__arm__) + uint32_t i; + + // do 8 multiplications at a time + simd_q15_t alpha_r_128,alpha_i_128,yr,yi,*x_128=(simd_q15_t*)x,*y_128=(simd_q15_t*)y; + int j; + + // printf("alpha = %d,%d\n",alpha[0],alpha[1]); + alpha_r_128 = set1_int16(alpha->r); + alpha_i_128 = set1_int16(alpha->i); + + j=0; + + for (i=0; i<N>>3; i++) { + + yr = mulhi_s1_int16(alpha_r_128,x_128[i]); + yi = mulhi_s1_int16(alpha_i_128,x_128[i]); +#if defined(__x86_64__) || defined(__i386__) + y_128[j] = _mm_adds_epi16(y_128[j],_mm_unpacklo_epi16(yr,yi)); + j++; + y_128[j] = _mm_adds_epi16(y_128[j],_mm_unpackhi_epi16(yr,yi)); + j++; +#elif defined(__arm__) + int16x8x2_t yint; + yint = vzipq_s16(yr,yi); + y_128[j] = adds_int16(y_128[j],yint.val[0]); + j++; + y_128[j] = adds_int16(y_128[j],yint.val[1]); + + j++; +#endif + } -#define squaredMod(a) ((a).r*(a).r+(a).i*(a).i) -#define csum(res, i1, i2) (res).r=(i1).r+(i2).r ; (res).i=(i1).i+(i2).i +#else + for (int i=0; i<N; i++) { + int tmpr=y[i].r+((x[i]*alpha->r)>>14); + if (tmpr>INT16_MAX) + tmpr=INT16_MAX; + if (tmpr<INT16_MIN) + tmpr=INT16_MIN; + int tmpi=y[i].i+((x[i]*alpha->i)>>14); + if (tmpi>INT16_MAX) + tmpi=INT16_MAX; + if (tmpi<INT16_MIN) + tmpi=INT16_MIN; + y[i].r=(int16_t)tmpr; + y[i].i=(int16_t)tmpi; + } + +#endif + } //cmult_sv.h /*!\fn void multadd_real_vector_complex_scalar(int16_t *x,int16_t *alpha,int16_t *y,uint32_t N) @@ -85,10 +238,29 @@ void multadd_real_vector_complex_scalar(int16_t *x, int16_t *y, uint32_t N); -void multadd_real_four_symbols_vector_complex_scalar(int16_t *x, - int16_t *alpha, - int16_t *y); +__attribute__((always_inline)) inline void multadd_real_four_symbols_vector_complex_scalar(int16_t *x, + c16_t *alpha, + c16_t *y) +{ + + // do 8 multiplications at a time + simd_q15_t alpha_r_128,alpha_i_128,yr,yi,*x_128=(simd_q15_t*)x; + simd_q15_t y_128; + y_128 = _mm_loadu_si128((simd_q15_t*)y); + + alpha_r_128 = set1_int16(alpha->r); + alpha_i_128 = set1_int16(alpha->i); + + + yr = mulhi_s1_int16(alpha_r_128,x_128[0]); + yi = mulhi_s1_int16(alpha_i_128,x_128[0]); + y_128 = _mm_adds_epi16(y_128,_mm_unpacklo_epi16(yr,yi)); + y_128 = _mm_adds_epi16(y_128,_mm_unpackhi_epi16(yr,yi)); + _mm_storeu_si128((simd_q15_t*)y, y_128); + +} + /*!\fn void multadd_complex_vector_real_scalar(int16_t *x,int16_t alpha,int16_t *y,uint8_t zero_flag,uint32_t N) This function performs componentwise multiplication and accumulation of a real scalar and a complex vector. @param x Vector input (Q1.15) in the format |Re0 Im0|Re1 Im 1| ... @@ -114,7 +286,7 @@ void multadd_complex_vector_real_scalar(int16_t *x, //cmult_vv.c /*! - Multiply elementwise the complex conjugate of x1 with x2. + Multiply elementwise the complex conjugate of x1 with x2. @param x1 - input 1 in the format |Re0 Im0 Re1 Im1|,......,|Re(N-2) Im(N-2) Re(N-1) Im(N-1)| We assume x1 with a dinamic of 15 bit maximum @param x2 - input 2 in the format |Re0 Im0 Re1 Im1|,......,|Re(N-2) Im(N-2) Re(N-1) Im(N-1)| @@ -364,8 +536,8 @@ dft_size_idx_t get_dft(int ofdm_symbol_size) printf("function get_dft : unsupported ofdm symbol size \n"); assert(0); break; - } - return DFT_SIZE_IDXTABLESIZE; // never reached and will trigger assertion in idft function; + } + return DFT_SIZE_IDXTABLESIZE; // never reached and will trigger assertion in idft function; } typedef enum idft_size_idx { @@ -448,8 +620,8 @@ idft_size_idx_t get_idft(int ofdm_symbol_size) printf("function get_idft : unsupported ofdm symbol size \n"); assert(0); break; - } - return IDFT_SIZE_IDXTABLESIZE; // never reached and will trigger assertion in idft function + } + return IDFT_SIZE_IDXTABLESIZE; // never reached and will trigger assertion in idft function } @@ -491,58 +663,6 @@ int32_t sub_cpx_vector16(int16_t *x, int16_t *z, uint32_t N); -int32_t add_cpx_vector32(int16_t *x, - int16_t *y, - int16_t *z, - uint32_t N); - -int32_t add_real_vector64(int16_t *x, - int16_t *y, - int16_t *z, - uint32_t N); - -int32_t sub_real_vector64(int16_t *x, - int16_t *y, - int16_t *z, - uint32_t N); - -int32_t add_real_vector64_scalar(int16_t *x, - long long int a, - int16_t *y, - uint32_t N); - -/*!\fn int32_t add_vector16(int16_t *x,int16_t *y,int16_t *z,uint32_t N) -This function performs componentwise addition of two vectors with Q1.15 components. -@param x Vector input (Q1.15) -@param y Scalar input (Q1.15) -@param z Scalar output (Q1.15) -@param N Length of x WARNING: N must be a multiple of 32 - -The function implemented is : \f$\mathbf{z} = \mathbf{x} + \mathbf{y}\f$ -*/ -int32_t add_vector16(int16_t *x, - int16_t *y, - int16_t *z, - uint32_t N); - -int32_t add_vector16_64(int16_t *x, - int16_t *y, - int16_t *z, - uint32_t N); - -int32_t complex_conjugate(int16_t *x1, - int16_t *y, - uint32_t N); - -void bit8_txmux(int32_t length,int32_t offset); - -void bit8_rxdemux(int32_t length,int32_t offset); - -void Zero_Buffer(void *,uint32_t); -void Zero_Buffer_nommx(void *buf,uint32_t length); - -void mmxcopy(void *dest,void *src,int size); - /*!\fn int32_t signal_energy(int *,uint32_t); \brief Computes the signal energy per subcarrier */ @@ -631,7 +751,6 @@ int64_t dot_product64(int16_t *x, double interp(double x, double *xs, double *ys, int count); -int write_output(const char *fname,const char *vname,void *data,int length,int dec,char format); #ifdef __cplusplus } diff --git a/openair1/PHY/defs_eNB.h b/openair1/PHY/defs_eNB.h index 3a48dc51d66db3a0d4eba955513ade438a9bc6c2..d5ad705f4e0146da7d7b1bb8db0c7aff7caac2f5 100644 --- a/openair1/PHY/defs_eNB.h +++ b/openair1/PHY/defs_eNB.h @@ -181,9 +181,6 @@ typedef struct { typedef struct { - /// \brief ?. - /// first index: ? [0..1023] (hard coded) - int16_t *prachF; /// \brief ?. /// first index: ce_level [0..3] /// second index: rx antenna [0..63] (hard coded) \note Hard coded array size indexed by \c nb_antennas_rx. diff --git a/openair1/SCHED/prach_procedures.c b/openair1/SCHED/prach_procedures.c index 805354ce1be5ce7d7b7043f60e44482807fc78e0..fd78ed4bfb73eb913ca5dd197207ed9f74ec3e32 100644 --- a/openair1/SCHED/prach_procedures.c +++ b/openair1/SCHED/prach_procedures.c @@ -49,7 +49,7 @@ extern int oai_nfapi_rach_ind(nfapi_rach_indication_t *rach_ind); void prach_procedures(PHY_VARS_eNB *eNB, int br_flag ) { - uint16_t max_preamble[4],max_preamble_energy[4],max_preamble_delay[4],avg_preamble_energy[4]; + uint16_t max_preamble[4]={0},max_preamble_energy[4]={0},max_preamble_delay[4]={0},avg_preamble_energy[4]={0}; uint16_t i; int frame,subframe; diff --git a/openair3/ocp-gtpu/gtp_itf.cpp b/openair3/ocp-gtpu/gtp_itf.cpp index afef8ea4357ed5ea291d7a635ae6b26f0f363b8a..9779233538516b2c6d7004d9c45a0fc2db849762 100644 --- a/openair3/ocp-gtpu/gtp_itf.cpp +++ b/openair3/ocp-gtpu/gtp_itf.cpp @@ -145,7 +145,7 @@ class gtpEndPoints { ~gtpEndPoints() { // automatically close all sockets on quit - for (const auto p : instances) + for (const auto &p : instances) close(p.first); } };