diff --git a/cmake_targets/CMakeLists.txt b/cmake_targets/CMakeLists.txt index 245bba4f8bc7c36d51baaf9f5f76a796bdef7574..df16bccfb55d308a0b915268fa9ed6b87ac6a657 100644 --- a/cmake_targets/CMakeLists.txt +++ b/cmake_targets/CMakeLists.txt @@ -1177,6 +1177,7 @@ set(PHY_SRC_UE ${OPENAIR1_DIR}/PHY/LTE_UE_TRANSPORT/sss_ue.c ${OPENAIR1_DIR}/PHY/LTE_UE_TRANSPORT/dlsch_demodulation.c ${OPENAIR1_DIR}/PHY/LTE_UE_TRANSPORT/dlsch_llr_computation.c + ${OPENAIR1_DIR}/PHY/LTE_UE_TRANSPORT/linear_preprocessing_rec.c ${OPENAIR1_DIR}/PHY/LTE_UE_TRANSPORT/dlsch_decoding.c ${OPENAIR1_DIR}/PHY/LTE_UE_TRANSPORT/dci_tools_ue.c ${OPENAIR1_DIR}/PHY/LTE_UE_TRANSPORT/uci_tools_ue.c @@ -1817,11 +1818,13 @@ endif() # So, here are some hacks here. Hope this gets fixed in future! if(EXISTS "/usr/include/atlas/cblas.h" OR EXISTS "/usr/include/cblas.h") include_directories("/usr/include/atlas") + LINK_DIRECTORIES("/usr/lib/lapack") LINK_DIRECTORIES("/usr/lib64") LINK_DIRECTORIES("/usr/lib64/atlas") #Added because atlas libraries in CentOS 7 are here! if(EXISTS "/usr/lib64/libblas.so" OR EXISTS "/usr/lib/libblas.so") #Case for CentOS7 list(APPEND ATLAS_LIBRARIES blas) + else() # Case for Ubuntu list(APPEND ATLAS_LIBRARIES cblas) endif() @@ -1847,6 +1850,8 @@ else() message("No Blas/Atlas libs found, some targets will fail") endif() +list(APPEND ATLAS_LIBRARIES lapack lapacke) + if (${XFORMS}) include_directories ("/usr/include/X11") set(XFORMS_SOURCE diff --git a/openair1/PHY/LTE_UE_TRANSPORT/dlsch_demodulation.c b/openair1/PHY/LTE_UE_TRANSPORT/dlsch_demodulation.c index f82ba57e69e9bc2151e0b66b04cbee342872a42a..271151f56111aa05b63588db05afc70738f26358 100644 --- a/openair1/PHY/LTE_UE_TRANSPORT/dlsch_demodulation.c +++ b/openair1/PHY/LTE_UE_TRANSPORT/dlsch_demodulation.c @@ -36,8 +36,18 @@ #include "transport_proto_ue.h" #include "PHY/sse_intrin.h" #include "T.h" +#include<stdio.h> +#include<math.h> +#include <stdlib.h> +#include <string.h> +#include <lapacke_utils.h> +#include <lapacke.h> +#include <cblas.h> +#include "linear_preprocessing_rec.h" #define NOCYGWIN_STATIC +//#define DEBUG_MMSE + /* dynamic shift for LLR computation for TM3/4 * set as command line argument, see lte-softmodem.c @@ -103,6 +113,7 @@ int rx_pdsch(PHY_VARS_UE *ue, int avg[4]; int avg_0[2]; int avg_1[2]; + unsigned short mmse_flag=0; #if UE_TIMING_TRACE uint8_t slot = 0; @@ -440,14 +451,15 @@ int rx_pdsch(PHY_VARS_UE *ue, ((dlsch0_harq->mimo_mode >=DUALSTREAM_UNIFORM_PRECODING1) && (dlsch0_harq->mimo_mode <=DUALSTREAM_PUSCH_PRECODING))) { - dlsch_channel_level_TM34(pdsch_vars[eNB_id]->dl_ch_estimates_ext, - frame_parms, - pdsch_vars[eNB_id]->pmi_ext, - avg_0, - avg_1, - symbol, - nb_rb, - dlsch0_harq->mimo_mode); + dlsch_channel_level_TM34(pdsch_vars[eNB_id]->dl_ch_estimates_ext, + frame_parms, + pdsch_vars[eNB_id]->pmi_ext, + avg_0, + avg_1, + symbol, + nb_rb, + mmse_flag, + dlsch0_harq->mimo_mode); LOG_D(PHY,"Channel Level TM34 avg_0 %d, avg_1 %d, rx_type %d, rx_standard %d, dlsch_demod_shift %d \n", avg_0[0], avg_1[0], rx_type, rx_standard, dlsch_demod_shift); @@ -547,6 +559,26 @@ int rx_pdsch(PHY_VARS_UE *ue, start_meas(&ue->generic_stat_bis[ue->current_thread_id[subframe]][slot]); #endif + if (rx_type==rx_IC_dual_stream && mmse_flag==1){ + + precode_channel_est(pdsch_vars[eNB_id]->dl_ch_estimates_ext, + frame_parms, + pdsch_vars[eNB_id], + symbol, + nb_rb, + dlsch0_harq->mimo_mode); + + mmse_processing_oai(pdsch_vars[eNB_id], + frame_parms, + measurements, + first_symbol_flag, + dlsch0_harq->mimo_mode, + mmse_flag, + 0.0, + symbol, + nb_rb); + } + // Now channel compensation if (dlsch0_harq->mimo_mode<LARGE_CDD) { dlsch_channel_compensation(pdsch_vars[eNB_id]->rxdataF_ext, @@ -607,6 +639,7 @@ int rx_pdsch(PHY_VARS_UE *ue, dlsch0_harq->round, dlsch0_harq->mimo_mode, nb_rb, + mmse_flag, pdsch_vars[eNB_id]->log2_maxh0, pdsch_vars[eNB_id]->log2_maxh1); if (symbol == 5) { @@ -2479,6 +2512,69 @@ void dlsch_channel_compensation_TM56(int **rxdataF_ext, _m_empty(); } +void precode_channel_est(int32_t **dl_ch_estimates_ext, + LTE_DL_FRAME_PARMS *frame_parms, + LTE_UE_PDSCH *pdsch_vars, + unsigned char symbol, + unsigned short nb_rb, + MIMO_mode_t mimo_mode){ + + unsigned short rb; + __m128i *dl_ch0_128,*dl_ch1_128; + unsigned char aarx=0,symbol_mod,pilots=0; + unsigned char *pmi_ext = pdsch_vars->pmi_ext; + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + + if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp))) + pilots=1; + + for (aarx=0;aarx<frame_parms->nb_antennas_rx;aarx++) { + + dl_ch0_128 = (__m128i *)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12]; // this is h11 + dl_ch1_128 = (__m128i *)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12]; // this is h12 + + for (rb=0; rb<nb_rb; rb++) { + if (mimo_mode==LARGE_CDD) { + prec2A_TM3_128(&dl_ch0_128[0],&dl_ch1_128[0]); + prec2A_TM3_128(&dl_ch0_128[1],&dl_ch1_128[1]); + if (pilots==0) { + prec2A_TM3_128(&dl_ch0_128[2],&dl_ch1_128[2]); + } + }else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODING1) { + prec2A_TM4_128(0,&dl_ch0_128[0],&dl_ch1_128[0]); + prec2A_TM4_128(0,&dl_ch0_128[1],&dl_ch1_128[1]); + if (pilots==0) { + prec2A_TM4_128(0,&dl_ch0_128[2],&dl_ch1_128[2]); + } + }else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODINGj) { + prec2A_TM4_128(1,&dl_ch0_128[0],&dl_ch1_128[0]); + prec2A_TM4_128(1,&dl_ch0_128[1],&dl_ch1_128[1]); + if (pilots==0) { + prec2A_TM4_128(1,&dl_ch0_128[2],&dl_ch1_128[2]); + } + }else if (mimo_mode==DUALSTREAM_PUSCH_PRECODING) { + prec2A_TM4_128(pmi_ext[rb],&dl_ch0_128[0],&dl_ch1_128[0]); + prec2A_TM4_128(pmi_ext[rb],&dl_ch0_128[1],&dl_ch1_128[1]); + if (pilots==0) { + prec2A_TM4_128(pmi_ext[rb],&dl_ch0_128[2],&dl_ch1_128[2]); + } + }else { + LOG_E(PHY,"Unknown MIMO mode\n"); + return; + } + if (pilots==0){ + dl_ch0_128+=3; + dl_ch1_128+=3; + }else { + dl_ch0_128+=2; + dl_ch1_128+=2; + } + } + + } +} + void dlsch_channel_compensation_TM34(LTE_DL_FRAME_PARMS *frame_parms, LTE_UE_PDSCH *pdsch_vars, PHY_MEASUREMENTS *measurements, @@ -2490,6 +2586,7 @@ void dlsch_channel_compensation_TM34(LTE_DL_FRAME_PARMS *frame_parms, int round, MIMO_mode_t mimo_mode, unsigned short nb_rb, + unsigned short mmse_flag, unsigned char output_shift0, unsigned char output_shift1) { @@ -2539,20 +2636,8 @@ void dlsch_channel_compensation_TM34(LTE_DL_FRAME_PARMS *frame_parms, for (aarx=0;aarx<frame_parms->nb_antennas_rx;aarx++) { - /* if (aarx==0) { - output_shift=output_shift0; - } - else { - output_shift=output_shift1; - } */ - - // printf("antenna %d\n", aarx); - // printf("symbol %d, rx antenna %d\n", symbol, aarx); - dl_ch0_128 = (__m128i *)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12]; // this is h11 dl_ch1_128 = (__m128i *)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12]; // this is h12 - - dl_ch_mag0_128 = (__m128i *)&dl_ch_mag0[aarx][symbol*frame_parms->N_RB_DL*12]; //responsible for x1 dl_ch_mag0_128b = (__m128i *)&dl_ch_magb0[aarx][symbol*frame_parms->N_RB_DL*12];//responsible for x1 dl_ch_mag1_128 = (__m128i *)&dl_ch_mag1[aarx][symbol*frame_parms->N_RB_DL*12]; //responsible for x2. always coming from tx2 @@ -2562,50 +2647,39 @@ void dlsch_channel_compensation_TM34(LTE_DL_FRAME_PARMS *frame_parms, rxdataF_comp1_128 = (__m128i *)&rxdataF_comp1[aarx][symbol*frame_parms->N_RB_DL*12]; //result of multipl with MF x2 on antenna of interest for (rb=0; rb<nb_rb; rb++) { - + if (mmse_flag == 0) { // combine TX channels using precoder from pmi - if (mimo_mode==LARGE_CDD) { - prec2A_TM3_128(&dl_ch0_128[0],&dl_ch1_128[0]); - prec2A_TM3_128(&dl_ch0_128[1],&dl_ch1_128[1]); - - - if (pilots==0) { - prec2A_TM3_128(&dl_ch0_128[2],&dl_ch1_128[2]); - } - } - else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODING1) { - prec2A_TM4_128(0,&dl_ch0_128[0],&dl_ch1_128[0]); - prec2A_TM4_128(0,&dl_ch0_128[1],&dl_ch1_128[1]); - - if (pilots==0) { - prec2A_TM4_128(0,&dl_ch0_128[2],&dl_ch1_128[2]); - } - } - else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODINGj) { - prec2A_TM4_128(1,&dl_ch0_128[0],&dl_ch1_128[0]); - prec2A_TM4_128(1,&dl_ch0_128[1],&dl_ch1_128[1]); - - if (pilots==0) { - prec2A_TM4_128(1,&dl_ch0_128[2],&dl_ch1_128[2]); - } - } - - else if (mimo_mode==DUALSTREAM_PUSCH_PRECODING) { - prec2A_TM4_128(pmi_ext[rb],&dl_ch0_128[0],&dl_ch1_128[0]); - prec2A_TM4_128(pmi_ext[rb],&dl_ch0_128[1],&dl_ch1_128[1]); - - if (pilots==0) { - prec2A_TM4_128(pmi_ext[rb],&dl_ch0_128[2],&dl_ch1_128[2]); + if (mimo_mode==LARGE_CDD) { + prec2A_TM3_128(&dl_ch0_128[0],&dl_ch1_128[0]); + prec2A_TM3_128(&dl_ch0_128[1],&dl_ch1_128[1]); + if (pilots==0) { + prec2A_TM3_128(&dl_ch0_128[2],&dl_ch1_128[2]); + } + }else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODING1) { + prec2A_TM4_128(0,&dl_ch0_128[0],&dl_ch1_128[0]); + prec2A_TM4_128(0,&dl_ch0_128[1],&dl_ch1_128[1]); + if (pilots==0) { + prec2A_TM4_128(0,&dl_ch0_128[2],&dl_ch1_128[2]); + } + }else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODINGj) { + prec2A_TM4_128(1,&dl_ch0_128[0],&dl_ch1_128[0]); + prec2A_TM4_128(1,&dl_ch0_128[1],&dl_ch1_128[1]); + if (pilots==0) { + prec2A_TM4_128(1,&dl_ch0_128[2],&dl_ch1_128[2]); + } + }else if (mimo_mode==DUALSTREAM_PUSCH_PRECODING) { + prec2A_TM4_128(pmi_ext[rb],&dl_ch0_128[0],&dl_ch1_128[0]); + prec2A_TM4_128(pmi_ext[rb],&dl_ch0_128[1],&dl_ch1_128[1]); + if (pilots==0) { + prec2A_TM4_128(pmi_ext[rb],&dl_ch0_128[2],&dl_ch1_128[2]); + } + }else { + LOG_E(PHY,"Unknown MIMO mode\n"); + return; } } - else { - LOG_E(PHY,"Unknown MIMO mode\n"); - return; - } - - if (mod_order0>2) { // get channel amplitude if not QPSK @@ -2732,7 +2806,7 @@ void dlsch_channel_compensation_TM34(LTE_DL_FRAME_PARMS *frame_parms, // print_shorts("rx:",rxdataF128); // print_shorts("ch:",dl_ch0_128); - // print_shorts("pack:",rxdataF_comp0_128); + //print_shorts("pack:",rxdataF_comp0_128); // multiply by conjugated channel mmtmpD0 = _mm_madd_epi16(dl_ch0_128[1],rxdataF128[1]); @@ -2951,36 +3025,31 @@ void dlsch_channel_compensation_TM34(LTE_DL_FRAME_PARMS *frame_parms, rxdataF_comp1_128 = (int16x8_t*)&rxdataF_comp1[aarx][symbol*frame_parms->N_RB_DL*12]; for (rb=0; rb<nb_rb; rb++) { - // combine TX channels using precoder from pmi - if (mimo_mode==LARGE_CDD) { - prec2A_TM3_128(&dl_ch0_128[0],&dl_ch1_128[0]); - prec2A_TM3_128(&dl_ch0_128[1],&dl_ch1_128[1]); - - - if (pilots==0) { - prec2A_TM3_128(&dl_ch0_128[2],&dl_ch1_128[2]); - } - } - else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODING1) { - prec2A_TM4_128(0,&dl_ch0_128[0],&dl_ch1_128[0]); - prec2A_TM4_128(0,&dl_ch0_128[1],&dl_ch1_128[1]); - - if (pilots==0) { - prec2A_TM4_128(0,&dl_ch0_128[2],&dl_ch1_128[2]); - } - } - else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODINGj) { - prec2A_TM4_128(1,&dl_ch0_128[0],&dl_ch1_128[0]); - prec2A_TM4_128(1,&dl_ch0_128[1],&dl_ch1_128[1]); - - if (pilots==0) { - prec2A_TM4_128(1,&dl_ch0_128[2],&dl_ch1_128[2]); + if (mmse_flag == 0) { + // combine TX channels using precoder from pmi + if (mimo_mode==LARGE_CDD) { + prec2A_TM3_128(&dl_ch0_128[0],&dl_ch1_128[0]); + prec2A_TM3_128(&dl_ch0_128[1],&dl_ch1_128[1]); + if (pilots==0) { + prec2A_TM3_128(&dl_ch0_128[2],&dl_ch1_128[2]); + } + }else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODING1) { + prec2A_TM4_128(0,&dl_ch0_128[0],&dl_ch1_128[0]); + prec2A_TM4_128(0,&dl_ch0_128[1],&dl_ch1_128[1]); + if (pilots==0) { + prec2A_TM4_128(0,&dl_ch0_128[2],&dl_ch1_128[2]); + } + }else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODINGj) { + prec2A_TM4_128(1,&dl_ch0_128[0],&dl_ch1_128[0]); + prec2A_TM4_128(1,&dl_ch0_128[1],&dl_ch1_128[1]); + if (pilots==0) { + prec2A_TM4_128(1,&dl_ch0_128[2],&dl_ch1_128[2]); + } + }else { + LOG_E(PHY,"Unknown MIMO mode\n"); + return; } } - else { - LOG_E(PHY,"Unknown MIMO mode\n"); - return; - } if (mod_order0>2) { @@ -3869,7 +3938,592 @@ void dlsch_channel_level_core(int **dl_ch_estimates_ext, } -//compute average channel_level of effective (precoded) channel +void mmse_processing_oai(LTE_UE_PDSCH *pdsch_vars, + LTE_DL_FRAME_PARMS *frame_parms, + PHY_MEASUREMENTS *measurements, + unsigned char first_symbol_flag, + MIMO_mode_t mimo_mode, + unsigned short mmse_flag, + int noise_power, + unsigned char symbol, + unsigned short nb_rb){ + + int **rxdataF_ext = pdsch_vars->rxdataF_ext; + int **dl_ch_estimates_ext = pdsch_vars->dl_ch_estimates_ext; + unsigned char *pmi_ext = pdsch_vars->pmi_ext; + int avg_00[frame_parms->nb_antenna_ports_eNB*frame_parms->nb_antennas_rx]; + int avg_01[frame_parms->nb_antenna_ports_eNB*frame_parms->nb_antennas_rx]; + int symbol_mod, length, start_point, nre; + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + + if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB!=1)) + nre=8; + else if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB==1)) + nre=10; + else + nre=12; + + length = nre*nb_rb; + start_point = symbol*nb_rb*12; + + + mmse_processing_core(rxdataF_ext, + dl_ch_estimates_ext, + noise_power, + frame_parms->nb_antenna_ports_eNB, + frame_parms->nb_antennas_rx, + length, + start_point); + + + /*dlsch_channel_aver_band(dl_ch_estimates_ext, + frame_parms, + chan_avg, + symbol, + nb_rb); + + + for (aatx=0; aatx<frame_parms->nb_antenna_ports_eNB; aatx++) + for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { + H[aatx*frame_parms->nb_antennas_rx + aarx] = (float)(chan_avg[aatx*frame_parms->nb_antennas_rx + aarx].r/(32768.0)) + I*(float)(chan_avg[aatx*frame_parms->nb_antennas_rx + aarx].i/(32768.0)); + // printf("H [%d] = (%f, %f) \n", aatx*frame_parms->nb_antennas_rx + aarx, creal(H[aatx*frame_parms->nb_antennas_rx + aarx]), cimag(H[aatx*frame_parms->nb_antennas_rx + aarx])); + }*/ + + if (first_symbol_flag == 1){ + dlsch_channel_level_TM34(dl_ch_estimates_ext, + frame_parms, + pmi_ext, + avg_00, + avg_01, + symbol, + nb_rb, + mmse_flag, + mimo_mode); + + avg_00[0] = (log2_approx(avg_00[0])/2) + dlsch_demod_shift+4;// + 2 ;//+ 4; + avg_01[0] = (log2_approx(avg_01[0])/2) + dlsch_demod_shift+4;// + 2 ;//+ 4; + pdsch_vars->log2_maxh0 = cmax(avg_00[0],0); + pdsch_vars->log2_maxh1 = cmax(avg_01[0],0); + } +} + +void mmse_processing_core(int32_t **rxdataF_ext, + int32_t **dl_ch_estimates_ext, + int noise_power, + int n_tx, + int n_rx, + int length, + int start_point){ + + int aatx, aarx, re; + float imag; + float real; + + + float complex **W_MMSE= malloc(n_tx*n_rx*sizeof(float complex*)); + for (int j=0; j<n_tx*n_rx; j++) { + W_MMSE[j] = malloc(sizeof(float complex)*length); + } + + float complex *H= malloc(n_tx*n_rx*sizeof(float complex)); + float complex *W_MMSE_re= malloc(n_tx*n_rx*sizeof(float complex)); + + float complex** dl_ch_estimates_ext_flcpx = malloc(n_tx*n_rx*sizeof(float complex*)); + for (int j=0; j<n_tx*n_rx; j++) { + dl_ch_estimates_ext_flcpx[j] = malloc(sizeof(float complex)*length); + } + + float complex** rxdataF_ext_flcpx = malloc(n_rx*sizeof(float complex*)); + for (int j=0; j<n_rx; j++) { + rxdataF_ext_flcpx[j] = malloc(sizeof(float complex)*length); + } + + chan_est_to_float(dl_ch_estimates_ext, + dl_ch_estimates_ext_flcpx, + n_tx, + n_rx, + length, + start_point); + + for (re=0; re<length; re++){ + for (aatx=0; aatx<n_tx; aatx++){ + for (aarx=0; aarx<n_rx; aarx++) { + imag = cimag(dl_ch_estimates_ext_flcpx[aatx*n_rx + aarx][re]); + real = creal(dl_ch_estimates_ext_flcpx[aatx*n_rx + aarx][re]); + H[aatx*n_rx + aarx] = real+ I*imag; + } + } + compute_MMSE(H, n_tx, noise_power, W_MMSE_re); + for (aatx=0; aatx<n_tx; aatx++){ + for (aarx=0; aarx<n_rx; aarx++) { + W_MMSE[aatx*n_rx + aarx][re] = W_MMSE_re[aatx*n_rx + aarx]; + } + } + } + + rxdataF_to_float(rxdataF_ext, + rxdataF_ext_flcpx, + n_rx, + length, + start_point); + + mult_mmse_rxdataF(W_MMSE, + rxdataF_ext_flcpx, + n_tx, + n_rx, + length, + start_point); + + + mult_mmse_chan_est(W_MMSE, + dl_ch_estimates_ext_flcpx, + n_tx, + n_rx, + length, + start_point); + + + float_to_rxdataF(rxdataF_ext, + rxdataF_ext_flcpx, + n_tx, + n_rx, + length, + start_point); + + + float_to_chan_est(dl_ch_estimates_ext, + dl_ch_estimates_ext_flcpx, + n_tx, + n_rx, + length, + start_point); + +free(W_MMSE); +free(H); +free(W_MMSE_re); +free(dl_ch_estimates_ext_flcpx); +free(rxdataF_ext_flcpx); + +} + + +/*THIS FUNCTION TAKES FLOAT_POINT INPUT. SHOULD NOT BE USED WITH OAI*/ +void mmse_processing_core_flp(float complex** rxdataF_ext_flcpx, + float complex **H, + int32_t **rxdataF_ext, + int32_t **dl_ch_estimates_ext, + float noise_power, + int n_tx, + int n_rx, + int length, + int start_point){ + + int aatx, aarx, re; + float max = 0; + float one_over_max = 0; + + float complex **W_MMSE= malloc(n_tx*n_rx*sizeof(float complex*)); + for (int j=0; j<n_tx*n_rx; j++) { + W_MMSE[j] = malloc(sizeof(float complex)*length); + } + + float complex *H_re= malloc(n_tx*n_rx*sizeof(float complex)); + float complex *W_MMSE_re= malloc(n_tx*n_rx*sizeof(float complex)); + + for (re=0; re<length; re++){ + for (aatx=0; aatx<n_tx; aatx++){ + for (aarx=0; aarx<n_rx; aarx++) { + H_re[aatx*n_rx + aarx] = H[aatx*n_rx + aarx][re]; +#ifdef DEBUG_MMSE + if (re == 0) + printf(" H_re[%d]= (%f + i%f)\n", aatx*n_rx + aarx, creal(H_re[aatx*n_rx + aarx]), cimag(H_re[aatx*n_rx + aarx])); +#endif + } + } + compute_MMSE(H_re, n_tx, noise_power, W_MMSE_re); + for (aatx=0; aatx<n_tx; aatx++){ + for (aarx=0; aarx<n_rx; aarx++) { + W_MMSE[aatx*n_rx + aarx][re] = W_MMSE_re[aatx*n_rx + aarx]; + if (fabs(creal(W_MMSE_re[aatx*n_rx + aarx])) > max) + max = fabs(creal(W_MMSE_re[aatx*n_rx + aarx])); + if (fabs(cimag(W_MMSE_re[aatx*n_rx + aarx])) > max) + max = fabs(cimag(W_MMSE_re[aatx*n_rx + aarx])); + } + } + } + one_over_max = 1.0/max; + + for (re=0; re<length; re++) + for (aatx=0; aatx<n_tx; aatx++) + for (aarx=0; aarx<n_rx; aarx++){ +#ifdef DEBUG_MMSE + if (re == 0) + printf(" W_MMSE[%d] = (%f + i%f)\n", aatx*n_rx + aarx, creal(W_MMSE[aatx*n_rx + aarx][re]), cimag(W_MMSE[aatx*n_rx + aarx][re])); +#endif + W_MMSE[aatx*n_rx + aarx][re] = one_over_max*W_MMSE[aatx*n_rx + aarx][re]; +#ifdef DEBUG_MMSE + if (re == 0) + printf(" AFTER NORM W_MMSE[%d] = (%f + i%f), max = %f \n", aatx*n_rx + aarx, creal(W_MMSE[aatx*n_rx + aarx][re]), cimag(W_MMSE[aatx*n_rx + aarx][re]), max); +#endif + } + + + mult_mmse_rxdataF(W_MMSE, + rxdataF_ext_flcpx, + n_tx, + n_rx, + length, + start_point); + + mult_mmse_chan_est(W_MMSE, + H, + n_tx, + n_rx, + length, + start_point); + + float_to_rxdataF(rxdataF_ext, + rxdataF_ext_flcpx, + n_tx, + n_rx, + length, + start_point); + + float_to_chan_est(dl_ch_estimates_ext, + H, + n_tx, + n_rx, + length, + start_point); + free(H_re); + free(W_MMSE); + free(W_MMSE_re); + +} + + +void dlsch_channel_aver_band(int **dl_ch_estimates_ext, + LTE_DL_FRAME_PARMS *frame_parms, + struct complex32 *chan_avg, + unsigned char symbol, + unsigned short nb_rb) +{ + +#if defined(__x86_64__)||defined(__i386__) + + short rb; + unsigned char aatx,aarx,nre=12,symbol_mod; + __m128i *dl_ch128, avg128D; + int32_t chan_est_avg[4]; + + symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol; + + if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB!=1)) + nre=8; + else if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB==1)) + nre=10; + else + nre=12; + + for (aatx=0; aatx<frame_parms->nb_antennas_tx; aatx++){ + for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) { + dl_ch128=(__m128i *)&dl_ch_estimates_ext[aatx*frame_parms->nb_antennas_rx + aarx][symbol*frame_parms->N_RB_DL*12]; + avg128D = _mm_setzero_si128(); + // print_shorts("avg128D 1",&avg128D); + + for (rb=0;rb<nb_rb;rb++) { + /* printf("symbol %d, ant %d, nre*nrb %d, rb %d \n", symbol, aatx*frame_parms->nb_antennas_rx + aarx, nb_rb*nre, rb); + print_shorts("aver dl_ch128",&dl_ch128[0]); + print_shorts("aver dl_ch128",&dl_ch128[1]); + print_shorts("aver dl_ch128",&dl_ch128[2]); + avg128D = _mm_add_epi16(avg128D, dl_ch128[0]);*/ + //print_shorts("avg128D 2",&avg128D); + + avg128D = _mm_add_epi16(avg128D, dl_ch128[1]); + // print_shorts("avg128D 3",&avg128D); + + if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB!=1)) { + dl_ch128+=2; + }else { + avg128D = _mm_add_epi16(avg128D,dl_ch128[2]); + // print_shorts("avg128D 4",&avg128D); + dl_ch128+=3; + } + } + + chan_avg[aatx*frame_parms->nb_antennas_rx + aarx].r =(((int16_t*)&avg128D)[0] + + ((int16_t*)&avg128D)[2] + + ((int16_t*)&avg128D)[4] + + ((int16_t*)&avg128D)[6])/(nb_rb*nre); + // printf("symb %d chan_avg re [%d] = %d\n", symbol, aatx*frame_parms->nb_antennas_rx + aarx, chan_avg[aatx*frame_parms->nb_antennas_rx + aarx].r); + + + + chan_avg[aatx*frame_parms->nb_antennas_rx + aarx].i =(((int16_t*)&avg128D)[1] + + ((int16_t*)&avg128D)[3] + + ((int16_t*)&avg128D)[5] + + ((int16_t*)&avg128D)[7])/(nb_rb*nre); + // printf("symb %d chan_avg im [%d] = %d\n", symbol, aatx*frame_parms->nb_antennas_rx + aarx, chan_avg[aatx*frame_parms->nb_antennas_rx + aarx].i); + + //printf("symb %d chan_avg im [%d] = %d\n", symbol, aatx*frame_parms->nb_antennas_rx + aarx, chan_avg[aatx*frame_parms->nb_antennas_rx + aarx].i); + + + chan_est_avg[aatx*frame_parms->nb_antennas_rx + aarx] = (((int32_t)chan_avg[aatx*frame_parms->nb_antennas_rx + aarx].i)<<16)|(((int32_t)chan_avg[aatx*frame_parms->nb_antennas_rx + aarx].r) & 0xffff); + + //printf("symb %d chan_est_avg [%d] = %d\n", symbol, aatx*frame_parms->nb_antennas_rx + aarx, chan_est_avg[aatx*frame_parms->nb_antennas_rx + aarx]); + + dl_ch128=(__m128i *)&dl_ch_estimates_ext[aatx*frame_parms->nb_antennas_rx + aarx][symbol*frame_parms->N_RB_DL*12]; + + for (rb=0;rb<nb_rb;rb++) { + dl_ch128[0] = _mm_set1_epi32(chan_est_avg[aatx*frame_parms->nb_antennas_rx + aarx]); + dl_ch128[1] = _mm_set1_epi32(chan_est_avg[aatx*frame_parms->nb_antennas_rx + aarx]); + if (((symbol_mod == 0) || (symbol_mod == (frame_parms->Ncp-1)))&&(frame_parms->nb_antenna_ports_eNB!=1)) { + dl_ch128+=2; + }else { + dl_ch128[2] = _mm_set1_epi32(chan_est_avg[aatx*frame_parms->nb_antennas_rx + aarx]); + dl_ch128+=3; + } + } + } + } + + _mm_empty(); + _m_empty(); + +#elif defined(__arm__) +#endif + } + +void rxdataF_to_float(int32_t **rxdataF_ext, + float complex **rxdataF_f, + int n_rx, + int length, + int start_point) +{ + short re; + int aarx; + int16_t imag; + int16_t real; + + for (aarx=0; aarx<n_rx; aarx++) { + for (re=0; re<length; re++){ + imag = (int16_t) (rxdataF_ext[aarx][start_point + re] >> 16); + real = (int16_t) (rxdataF_ext[aarx][start_point + re] & 0xffff); + rxdataF_f[aarx][re] = (float)(real/(32768.0)) + I*(float)(imag/(32768.0)); +#ifdef DEBUG_MMSE + if (re==0){ + printf("rxdataF_to_float: aarx = %d, real= %d, imag = %d\n", aarx, real, imag); + //printf("rxdataF_to_float: rxdataF_ext[%d][%d] = %d\n", aarx, start_point + re, rxdataF_ext[aarx][start_point + re]); + //printf("rxdataF_to_float: ant %d, re = %d, rxdataF_f real = %f, rxdataF_f imag = %f \n", aarx, re, creal(rxdataF_f[aarx][re]), cimag(rxdataF_f[aarx][re])); + } +#endif + } + } +} + + + +void chan_est_to_float(int32_t **dl_ch_estimates_ext, + float complex **dl_ch_estimates_ext_f, + int n_tx, + int n_rx, + int length, + int start_point) +{ + short re; + int aatx,aarx; + int16_t imag; + int16_t real; + + for (aatx=0; aatx<n_tx; aatx++){ + for (aarx=0; aarx<n_rx; aarx++) { + for (re=0; re<length; re++){ + imag = (int16_t) (dl_ch_estimates_ext[aatx*n_rx + aarx][start_point + re] >> 16); + real = (int16_t) (dl_ch_estimates_ext[aatx*n_rx + aarx][start_point+ re] & 0xffff); + dl_ch_estimates_ext_f[aatx*n_rx + aarx][re] = (float)(real/(32768.0)) + I*(float)(imag/(32768.0)); +#ifdef DEBUG_MMSE + if (re==0){ + printf("ant %d, re = %d, real = %d, imag = %d \n", aatx*n_rx + aarx, re, real, imag); + printf("ant %d, re = %d, real = %f, imag = %f \n", aatx*n_rx + aarx, re, creal(dl_ch_estimates_ext_f[aatx*n_rx + aarx][re]), cimag(dl_ch_estimates_ext_f[aatx*n_rx + aarx][re])); + } +#endif + } + } + } +} + +void float_to_chan_est(int32_t **dl_ch_estimates_ext, + float complex **dl_ch_estimates_ext_f, + int n_tx, + int n_rx, + int length, + int start_point) +{ + + short re; + int aarx, aatx; + int16_t imag; + int16_t real; + + for (aatx=0; aatx<n_tx; aatx++){ + for (aarx=0; aarx<n_rx; aarx++) { + for (re=0; re<length; re++){ + if (cimag(dl_ch_estimates_ext_f[aatx*n_rx + aarx][re])<-1) + imag = 0x8000; + else if (cimag(dl_ch_estimates_ext_f[aatx*n_rx + aarx][re])>=1) + imag = 0x7FFF; + else + imag = cimag(dl_ch_estimates_ext_f[aatx*n_rx + aarx][re])*32768; + if (creal(dl_ch_estimates_ext_f[aatx*n_rx + aarx][re])<-1) + real = 0x8000; + else if (creal(dl_ch_estimates_ext_f[aatx*n_rx + aarx][re])>=1) + real = 0x7FFF; + else + real = creal(dl_ch_estimates_ext_f[aatx*n_rx + aarx][re])*32768; + + dl_ch_estimates_ext[aatx*n_rx + aarx][start_point + re] = (((int32_t)imag)<<16)|((int32_t)real & 0xffff); +#ifdef DEBUG_MMSE + if (re==0){ + printf(" float_to_chan_est: chan est real = %f, chan est imag = %f\n",creal(dl_ch_estimates_ext_f[aatx*n_rx + aarx][re]), cimag(dl_ch_estimates_ext_f[aatx*n_rx + aarx][re])); + printf("float_to_chan_est: real fixed = %d, imag fixed = %d\n", real, imag); + printf("float_to_chan_est: ant %d, re = %d, dl_ch_estimates_ext = %d \n", aatx*n_rx + aarx, re, dl_ch_estimates_ext[aatx*n_rx + aarx][start_point + re]); + } +#endif + } + } + } +} + + +void float_to_rxdataF(int32_t **rxdataF_ext, + float complex **rxdataF_f, + int n_tx, + int n_rx, + int length, + int start_point) +{ + + short re; + int aarx; + int16_t imag; + int16_t real; + + for (aarx=0; aarx<n_rx; aarx++) { + for (re=0; re<length; re++){ + if (cimag(rxdataF_f[aarx][re])<-1) + imag = 0x8000; + else if (cimag(rxdataF_f[aarx][re])>=1) + imag = 0x7FFF; + else + imag = cimag(rxdataF_f[aarx][re])*32768; + if (creal(rxdataF_f[aarx][re])<-1) + real = 0x8000; + else if (creal(rxdataF_f[aarx][re])>=1) + real = 0x7FFF; + else + real = creal(rxdataF_f[aarx][re])*32768; + rxdataF_ext[aarx][start_point + re] = (((int32_t)imag)<<16)|(((int32_t)real) & 0xffff); +#ifdef DEBUG_MMSE + if (re==0){ + printf(" float_to_rxdataF: real = %f, imag = %f\n",creal(rxdataF_f[aarx][re]), cimag(rxdataF_f[aarx][re])); + printf("float_to_rxdataF: real fixed = %d, imag fixed = %d\n", real, imag); + printf("float_to_rxdataF: ant %d, re = %d, rxdataF_ext = %d \n", aarx, re, rxdataF_ext[aarx][symbol*nb_rb*12 + re]); + } +#endif + } + } +} + + +void mult_mmse_rxdataF(float complex** Wmmse, + float complex** rxdataF_ext_f, + int n_tx, + int n_rx, + int length, + int start_point) +{ + short re; + int aarx, aatx; + + + float complex* rxdata_re = malloc(n_rx*sizeof(float complex)); + float complex* rxdata_mmse_re = malloc(n_rx*sizeof(float complex)); + float complex* Wmmse_re = malloc(n_tx*n_rx*sizeof(float complex)); + + for (re=0;re<length; re++){ + for (aarx=0; aarx<n_rx; aarx++){ + rxdata_re[aarx] = rxdataF_ext_f[aarx][re]; +#ifdef DEBUG_MMSE + if (re==0) + printf("mult_mmse_rxdataF before: rxdata_re[%d] = (%f, %f)\n", aarx, creal(rxdata_re[aarx]), cimag(rxdata_re[aarx])); +#endif + } + for (aatx=0; aatx<n_tx; aatx++){ + for (aarx=0; aarx<n_rx; aarx++){ + Wmmse_re[aatx*n_rx + aarx] = Wmmse[aatx*n_rx + aarx][re]; + } + } + mutl_matrix_matrix_col_based(Wmmse_re, rxdata_re, n_rx, n_tx, n_rx, 1, rxdata_mmse_re); + + for (aarx=0; aarx<n_rx; aarx++){ + rxdataF_ext_f[aarx][re] = rxdata_mmse_re[aarx]; +#ifdef DEBUG_MMSE + if (re==0) + printf("mult_mmse_rxdataF after: rxdataF_ext_f[%d] = (%f, %f)\n", aarx, creal(rxdataF_ext_f[aarx][re]), cimag(rxdataF_ext_f[aarx][re])); +#endif + } + } + + free(rxdata_re); + free(rxdata_mmse_re); + free(Wmmse_re); +} + +void mult_mmse_chan_est(float complex** Wmmse, + float complex** dl_ch_estimates_ext_f, + int n_tx, + int n_rx, + int length, + int start_point) +{ + short re; + int aarx, aatx; + + float complex* chan_est_re = malloc(n_tx*n_rx*sizeof(float complex)); + float complex* chan_est_mmse_re = malloc(n_tx*n_rx*sizeof(float complex)); + float complex* Wmmse_re = malloc(n_tx*n_rx*sizeof(float complex)); + + for (re=0;re<length; re++){ + for (aatx=0; aatx<n_tx; aatx++){ + for (aarx=0; aarx<n_rx; aarx++){ + chan_est_re[aatx*n_rx + aarx] = dl_ch_estimates_ext_f[aatx*n_rx + aarx][re]; + Wmmse_re[aatx*n_rx + aarx] = Wmmse[aatx*n_rx + aarx][re]; +#ifdef DEBUG_MMSE + if (re==0) + printf("mult_mmse_chan_est: chan_est_re[%d] = (%f, %f)\n", aatx*n_rx + aarx, creal(chan_est_re[aatx*n_rx + aarx]), cimag(chan_est_re[aatx*n_rx + aarx])); +#endif + } + } + mutl_matrix_matrix_col_based(Wmmse_re, chan_est_re, n_rx, n_tx, n_rx, n_tx, chan_est_mmse_re); + for (aatx=0; aatx<n_tx; aatx++){ + for (aarx=0; aarx<n_rx; aarx++){ + dl_ch_estimates_ext_f[aatx*n_rx + aarx][re] = chan_est_mmse_re[aatx*n_rx + aarx]; +#ifdef DEBUG_MMSE + if (re==0) + printf("mult_mmse_chan_est: dl_ch_estimates_ext_f[%d][%d] = (%f, %f)\n", aatx*n_rx + aarx, re, creal(dl_ch_estimates_ext_f[aatx*n_rx + aarx][re]), cimag(dl_ch_estimates_ext_f[aatx*n_rx + aarx][re])); +#endif + } + } + } + free(Wmmse_re); + free(chan_est_re); + free(chan_est_mmse_re); +} + + + + //compute average channel_level of effective (precoded) channel void dlsch_channel_level_TM34(int **dl_ch_estimates_ext, @@ -3879,11 +4533,11 @@ void dlsch_channel_level_TM34(int **dl_ch_estimates_ext, int *avg_1, uint8_t symbol, unsigned short nb_rb, + unsigned int mmse_flag, MIMO_mode_t mimo_mode){ #if defined(__x86_64__)||defined(__i386__) - short rb; unsigned char aarx,nre=12,symbol_mod; __m128i *dl_ch0_128,*dl_ch1_128, dl_ch0_128_tmp, dl_ch1_128_tmp, avg_0_128D, avg_1_128D; @@ -3914,19 +4568,21 @@ void dlsch_channel_level_TM34(int **dl_ch_estimates_ext, avg_1_128D = _mm_setzero_si128(); for (rb=0; rb<nb_rb; rb++) { // printf("rb %d : \n",rb); - // print_shorts("ch0\n",&dl_ch0_128[0]); - //print_shorts("ch1\n",&dl_ch1_128[0]); + //print_shorts("ch0\n",&dl_ch0_128[0]); + //print_shorts("ch1\n",&dl_ch1_128[0]); dl_ch0_128_tmp = _mm_load_si128(&dl_ch0_128[0]); dl_ch1_128_tmp = _mm_load_si128(&dl_ch1_128[0]); - if (mimo_mode==LARGE_CDD) - prec2A_TM3_128(&dl_ch0_128_tmp,&dl_ch1_128_tmp); - else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODING1) - prec2A_TM4_128(0,&dl_ch0_128_tmp,&dl_ch1_128_tmp); - else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODINGj) - prec2A_TM4_128(1,&dl_ch0_128_tmp,&dl_ch1_128_tmp); - else if (mimo_mode==DUALSTREAM_PUSCH_PRECODING) - prec2A_TM4_128(pmi_ext[rb],&dl_ch0_128_tmp,&dl_ch1_128_tmp); + if (mmse_flag == 0){ + if (mimo_mode==LARGE_CDD) + prec2A_TM3_128(&dl_ch0_128_tmp,&dl_ch1_128_tmp); + else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODING1) + prec2A_TM4_128(0,&dl_ch0_128_tmp,&dl_ch1_128_tmp); + else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODINGj) + prec2A_TM4_128(1,&dl_ch0_128_tmp,&dl_ch1_128_tmp); + else if (mimo_mode==DUALSTREAM_PUSCH_PRECODING) + prec2A_TM4_128(pmi_ext[rb],&dl_ch0_128_tmp,&dl_ch1_128_tmp); + } // mmtmpD0 = _mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp); avg_0_128D = _mm_add_epi32(avg_0_128D,_mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp)); @@ -3936,14 +4592,16 @@ void dlsch_channel_level_TM34(int **dl_ch_estimates_ext, dl_ch0_128_tmp = _mm_load_si128(&dl_ch0_128[1]); dl_ch1_128_tmp = _mm_load_si128(&dl_ch1_128[1]); - if (mimo_mode==LARGE_CDD) - prec2A_TM3_128(&dl_ch0_128_tmp,&dl_ch1_128_tmp); - else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODING1) - prec2A_TM4_128(0,&dl_ch0_128_tmp,&dl_ch1_128_tmp); - else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODINGj) - prec2A_TM4_128(1,&dl_ch0_128_tmp,&dl_ch1_128_tmp); - else if (mimo_mode==DUALSTREAM_PUSCH_PRECODING) - prec2A_TM4_128(pmi_ext[rb],&dl_ch0_128_tmp,&dl_ch1_128_tmp); + if (mmse_flag == 0){ + if (mimo_mode==LARGE_CDD) + prec2A_TM3_128(&dl_ch0_128_tmp,&dl_ch1_128_tmp); + else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODING1) + prec2A_TM4_128(0,&dl_ch0_128_tmp,&dl_ch1_128_tmp); + else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODINGj) + prec2A_TM4_128(1,&dl_ch0_128_tmp,&dl_ch1_128_tmp); + else if (mimo_mode==DUALSTREAM_PUSCH_PRECODING) + prec2A_TM4_128(pmi_ext[rb],&dl_ch0_128_tmp,&dl_ch1_128_tmp); + } // mmtmpD1 = _mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp); avg_0_128D = _mm_add_epi32(avg_0_128D,_mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp)); @@ -3958,14 +4616,16 @@ void dlsch_channel_level_TM34(int **dl_ch_estimates_ext, dl_ch0_128_tmp = _mm_load_si128(&dl_ch0_128[2]); dl_ch1_128_tmp = _mm_load_si128(&dl_ch1_128[2]); - if (mimo_mode==LARGE_CDD) - prec2A_TM3_128(&dl_ch0_128_tmp,&dl_ch1_128_tmp); - else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODING1) - prec2A_TM4_128(0,&dl_ch0_128_tmp,&dl_ch1_128_tmp); - else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODINGj) - prec2A_TM4_128(1,&dl_ch0_128_tmp,&dl_ch1_128_tmp); - else if (mimo_mode==DUALSTREAM_PUSCH_PRECODING) - prec2A_TM4_128(pmi_ext[rb],&dl_ch0_128_tmp,&dl_ch1_128_tmp); + if (mmse_flag == 0){ + if (mimo_mode==LARGE_CDD) + prec2A_TM3_128(&dl_ch0_128_tmp,&dl_ch1_128_tmp); + else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODING1) + prec2A_TM4_128(0,&dl_ch0_128_tmp,&dl_ch1_128_tmp); + else if (mimo_mode==DUALSTREAM_UNIFORM_PRECODINGj) + prec2A_TM4_128(1,&dl_ch0_128_tmp,&dl_ch1_128_tmp); + else if (mimo_mode==DUALSTREAM_PUSCH_PRECODING) + prec2A_TM4_128(pmi_ext[rb],&dl_ch0_128_tmp,&dl_ch1_128_tmp); + } // mmtmpD2 = _mm_madd_epi16(dl_ch0_128_tmp,dl_ch0_128_tmp); avg_1_128D = _mm_add_epi32(avg_1_128D,_mm_madd_epi16(dl_ch1_128_tmp,dl_ch1_128_tmp)); diff --git a/openair1/PHY/LTE_UE_TRANSPORT/linear_preprocessing_rec.c b/openair1/PHY/LTE_UE_TRANSPORT/linear_preprocessing_rec.c new file mode 100644 index 0000000000000000000000000000000000000000..d85d440193757044eedadaf820ef4d46b47eddb6 --- /dev/null +++ b/openair1/PHY/LTE_UE_TRANSPORT/linear_preprocessing_rec.c @@ -0,0 +1,369 @@ +/* These functions compute linear preprocessing for +the UE using LAPACKE and CBLAS modules of +LAPACK libraries. +MMSE and MMSE whitening filters are available. +Functions are using RowMajor storage of the +matrices, like in conventional C. Traditional +Fortran functions of LAPACK employ ColumnMajor +data storage. */ + +#include<stdio.h> +#include<math.h> +#include<complex.h> +#include <stdlib.h> +#include <cblas.h> +#include <string.h> +#include <lapacke_utils.h> +#include <lapacke.h> + +//#define DEBUG_PREPROC + + +void transpose (int N, float complex *A, float complex *Result) +{ + // COnputes C := alpha*op(A)*op(B) + beta*C, + enum CBLAS_TRANSPOSE transa = CblasTrans; + enum CBLAS_TRANSPOSE transb = CblasNoTrans; + int rows_opA = N; // number of rows in op(A) and in C + int col_opB = N; //number of columns of op(B) and in C + int col_opA = N; //number of columns in op(A) and rows in op(B) + int col_B; //number of columns in B + float complex alpha = 1.0+I*0; + int lda = rows_opA; + float complex beta = 0.0+I*0; + int ldc = rows_opA; + int i; + float complex* B; + + int ldb = col_opB; + + if (transb == CblasNoTrans) { + B = (float complex*)calloc(ldb*col_opB,sizeof(float complex)); + col_B= col_opB; + } + else { + B = (float complex*)calloc(ldb*col_opA, sizeof(float complex)); + col_B = col_opA; + } + float complex* C = (float complex*)malloc(ldc*col_opB*sizeof(float complex)); + + for (i=0; i<lda*col_B; i+=N+1) + B[i]=1.0+I*0; + + cblas_cgemm(CblasRowMajor, transa, transb, rows_opA, col_opB, col_opA, &alpha, A, lda, B, ldb, &beta, C, ldc); + + memcpy(Result, C, N*N*sizeof(float complex)); + + free(B); + free(C); + } + + +void conjugate_transpose (int N, float complex *A, float complex *Result) +{ + // Computes C := alpha*op(A)*op(B) + beta*C, + enum CBLAS_TRANSPOSE transa = CblasConjTrans; + enum CBLAS_TRANSPOSE transb = CblasNoTrans; + int rows_opA = N; // number of rows in op(A) and in C + int col_opB = N; //number of columns of op(B) and in C + int col_opA = N; //number of columns in op(A) and rows in op(B) + int col_B; //number of columns in B + float complex alpha = 1.0+I*0; + int lda = rows_opA; + float complex beta = 0.0+I*0; + int ldc = rows_opA; + int i; + float complex* B; + int ldb = col_opB; + + if (transb == CblasNoTrans) { + B = (float complex*)calloc(ldb*col_opB,sizeof(float complex)); + col_B= col_opB; + } + else { + B = (float complex*)calloc(ldb*col_opA, sizeof(float complex)); + col_B = col_opA; + } + float complex* C = (float complex*)malloc(ldc*col_opB*sizeof(float complex)); + + for (i=0; i<lda*col_B; i+=N+1) + B[i]=1.0+I*0; + + cblas_cgemm(CblasRowMajor, transa, transb, rows_opA, col_opB, col_opA, &alpha, A, lda, B, ldb, &beta, C, ldc); + + memcpy(Result, C, N*N*sizeof(float complex)); + + free(B); + free(C); + } + +void H_hermH_plus_sigma2I (int N, int M, float complex *A, float sigma2, float complex *Result) +{ + //C := alpha*op(A)*op(B) + beta*C, + enum CBLAS_TRANSPOSE transa = CblasConjTrans; + enum CBLAS_TRANSPOSE transb = CblasNoTrans; + int rows_opA = N; // number of rows in op(A) and in C + int col_opB = N; //number of columns of op(B) and in C + int col_opA = N; //number of columns in op(A) and rows in op(B) + int col_C = N; //number of columns in B + float complex alpha = 1.0+I*0; + int lda = col_opA; + float complex beta = 1.0 + I*0; + int ldc = col_opA; + int i; + + float complex* C = (float complex*)calloc(ldc*col_opB, sizeof(float complex)); + + for (i=0; i<lda*col_C; i+=N+1) + C[i]=sigma2*(1.0+I*0); + + cblas_cgemm(CblasRowMajor, transa, transb, rows_opA, col_opB, col_opA, &alpha, A, lda, A, lda, &beta, C, ldc); + + memcpy(Result, C, N*M*sizeof(float complex)); + free(C); + + } + + void HH_herm_plus_sigma2I (int M, int N, float complex *A, float sigma2, float complex *Result) +{ + //C := alpha*op(A)*op(B) + beta*C, + enum CBLAS_TRANSPOSE transa = CblasNoTrans; + enum CBLAS_TRANSPOSE transb = CblasConjTrans; + int k = N; //number of columns in op(A) and rows in op(B),k + float complex alpha = 1.0+I*0; + int lda = N; + int ldb = N; + int ldc = M; + int i; + + float complex* C = (float complex*)calloc(M*M, sizeof(float complex)); + + for (i=0; i<M*M; i+=M+1) + C[i]=1.0+I*0; + + cblas_cgemm(CblasRowMajor, transa, transb, M, M, k, &alpha, A, lda, A, ldb, &sigma2, C, ldc); + + memcpy(Result, C, M*M*sizeof(float complex)); + free(C); + +} + +void eigen_vectors_values (int N, float complex *A, float complex *Vectors, float *Values_Matrix) +{ + // This function computes ORTHONORMAL eigenvectors and eigenvalues of matrix A, + // where Values_Matrix is a diagonal matrix of eigenvalues. + // A=Vectors*Values_Matrix*Vectors' + char jobz = 'V'; + char uplo = 'U'; + int order_A = N; + int lda = N; + int i; + float* Values = (float*)malloc(sizeof(float)*1*N); + + LAPACKE_cheev(LAPACK_ROW_MAJOR, jobz, uplo, order_A, A, lda, Values); + + memcpy(Vectors, A, N*N*sizeof(float complex)); + + for (i=0; i<lda; i+=1) + Values_Matrix[i*(lda+1)]=Values[i]; + + free(Values); +} + + void lin_eq_solver (int N, float complex* A, float complex* B, float complex* Result) +{ + int n = N; + int lda = N; + int ldb = N; + int nrhs = N; + + char transa = 'N'; + int* IPIV = malloc(N*N*sizeof(int)); + + // Compute LU-factorization + LAPACKE_cgetrf(LAPACK_ROW_MAJOR, n, nrhs, A, lda, IPIV); + + // Solve AX=B + LAPACKE_cgetrs(LAPACK_ROW_MAJOR, transa, n, nrhs, A, lda, IPIV, B, ldb); + + // cgetrs( "N", N, 4, A, lda, IPIV, B, ldb, INFO ) + + memcpy(Result, B, N*N*sizeof(float complex)); + + free(IPIV); + +} + +void mutl_matrix_matrix_row_based(float complex* M0, float complex* M1, int rows_M0, int col_M0, int rows_M1, int col_M1, float complex* Result ){ + enum CBLAS_TRANSPOSE transa = CblasNoTrans; + enum CBLAS_TRANSPOSE transb = CblasNoTrans; + int rows_opA = rows_M0; // number of rows in op(A) and in C + int col_opB = col_M1; //number of columns of op(B) and in C + int col_opA = col_M0; //number of columns in op(A) and rows in op(B) + float complex alpha =1.0; + int lda = col_M0; + float complex beta = 0.0; + int ldc = col_M1; + int ldb = col_M1; + +#ifdef DEBUG_PREPROC + int i=0; + printf("rows_M0 %d, col_M0 %d, rows_M1 %d, col_M1 %d\n", rows_M0, col_M0, rows_M1, col_M1); + + for(i=0; i<rows_M0*col_M0; ++i) + printf(" rows_opA = %d, col_opB = %d, W_MMSE[%d] = (%f + i%f)\n", rows_opA, col_opB, i , creal(M0[i]), cimag(M0[i])); + + for(i=0; i<rows_M1*col_M1; ++i) + printf(" M1[%d] = (%f + i%f)\n", i , creal(M1[i]), cimag(M1[i])); +#endif + + cblas_cgemm(CblasRowMajor, transa, transb, rows_opA, col_opB, col_opA, &alpha, M0, lda, M1, ldb, &beta, Result, ldc); + +#ifdef DEBUG_PREPROC + for(i=0; i<rows_opA*col_opB; ++i) + printf(" result[%d] = (%f + i%f)\n", i , creal(Result[i]), cimag(Result[i])); +#endif + +} +void mutl_matrix_matrix_col_based(float complex* M0, float complex* M1, int rows_M0, int col_M0, int rows_M1, int col_M1, float complex* Result ){ + enum CBLAS_TRANSPOSE transa = CblasNoTrans; + enum CBLAS_TRANSPOSE transb = CblasNoTrans; + int rows_opA = rows_M0; // number of rows in op(A) and in C + int col_opB = col_M1; //number of columns of op(B) and in C + int col_opA = col_M0; //number of columns in op(A) and rows in op(B) + float complex alpha =1.0; + int lda = col_M0; + float complex beta = 0.0; + int ldc = rows_M1; + int ldb = rows_M1; + +#ifdef DEBUG_PREPROC + int i = 0; + printf("rows_M0 %d, col_M0 %d, rows_M1 %d, col_M1 %d\n", rows_M0, col_M0, rows_M1, col_M1); + + for(i=0; i<rows_M0*col_M0; ++i) + printf(" rows_opA = %d, col_opB = %d, W_MMSE[%d] = (%f + i%f)\n", rows_opA, col_opB, i , creal(M0[i]), cimag(M0[i])); + + + for(i=0; i<rows_M1*col_M1; ++i) + printf(" M1[%d] = (%f + i%f)\n", i , creal(M1[i]), cimag(M1[i])); +#endif + + cblas_cgemm(CblasColMajor, transa, transb, rows_opA, col_opB, col_opA, &alpha, M0, lda, M1, ldb, &beta, Result, ldc); + +#ifdef DEBUG_PREPROC + for(i=0; i<rows_opA*col_opB; ++i) + printf(" result[%d] = (%f + i%f)\n", i , creal(Result[i]), cimag(Result[i])); +#endif +} + + +/*FILTERS */ +void compute_MMSE(float complex* H, int order_H, float sigma2, float complex* W_MMSE) +{ + int N = order_H; + float complex* H_hermH_sigmaI = malloc(N*N*sizeof(float complex)); + float complex* H_herm = malloc(N*N*sizeof(float complex)); + + H_hermH_plus_sigma2I(N, N, H, sigma2, H_hermH_sigmaI); + +#ifdef DEBUG_PREPROC + int i =0; + for(i=0;i<N*N;i++) + printf(" H_hermH_sigmaI[%d] = (%f + i%f)\n", i , creal(H_hermH_sigmaI[i]), cimag(H_hermH_sigmaI[i])); +#endif + + conjugate_transpose (N, H, H_herm); //equals H_herm + +#ifdef DEBUG_PREPROC + for(i=0;i<N*N;i++) + printf(" H_herm[%d] = (%f + i%f)\n", i , creal(H_herm[i]), cimag(H_herm[i])); +#endif + + lin_eq_solver(N, H_hermH_sigmaI, H_herm, W_MMSE); + +#ifdef DEBUG_PREPROC + for(i=0;i<N*N;i++) + printf(" W_MMSE[%d] = (%f + i%f)\n", i , creal(W_MMSE[i]), cimag(W_MMSE[i])); +#endif + + free(H_hermH_sigmaI); + free(H_herm); +} + +#if 0 +void compute_white_filter(float complex* H_re, + int order_H, + float sigma2, + float complex* W_Wh_0_re, + float complex* W_Wh_1_re){ + + int aatx, aarx, re; + int i,j; + int M =n_rx; + int N = n_tx; + int sigma2=noise_power; + + float complex *H0_re = malloc(n_rx*(n_tx>>2)*sizeof(float complex)); + float complex *H1_re = malloc(n_rx*(n_tx>>2)*sizeof(float complex)); + float complex *R_corr_col_n_0_re = malloc(n_rx*n_tx*sizeof(float complex)); + float complex *R_corr_col_n_1_re = malloc(n_rx*n_tx*sizeof(float complex)); + float complex *U_0_re = malloc(n_rx*n_tx*sizeof(float complex)); + float complex *U_1_re = malloc(n_rx*n_tx*sizeof(float complex)); + float complex *U_0_herm_re = malloc(n_rx*n_tx*sizeof(float complex)); + float complex *U_1_herm_re = malloc(n_rx*n_tx*sizeof(float complex)); + float complex *D_0_re = malloc(n_rx*n_tx*sizeof(float complex)); + float complex *D_1_re = malloc(n_rx*n_tx*sizeof(float complex)); + float complex *W_Wh_0_re = malloc(n_rx*n_tx*sizeof(float complex)); + float complex *W_Wh_1_re = malloc(n_rx*n_tx*sizeof(float complex)); + + for (aatx=0; aatx<n_tx/2; aatx++){ + for (aarx=0; aarx<n_rx; aarx++) { + H0_re[aatx*n_rx + aarx] = H_re[aatx*n_rx + aarx][re]; // H0 gets [0 1 2 3; 4,5,6,7].' coefficients of H + H1_re[aatx*n_rx + aarx] = H_re[aatx*n_rx + aarx + 8][re]; // H1 gets [8 9 10 11; 12, 13, 14, 15].' coefficients of H + if (re == 0) + printf("ant %d, H_re = (%f + i%f) \n", aatx*n_rx + aarx, creal(H[aatx*n_rx + aarx][re]), cimag(H[aatx*n_rx + aarx][re])); + } + } + + + //HH_herm_plus_sigma2I(n_rx, (n_tx>>2), H1_re, sigma2, R_corr_col_n_0_re); + HH_herm_plus_sigma2I(n_rx, (n_tx>>2), H0_re, sigma2, R_corr_col_n_1_re); + + eigen_vectors_values(n_rx, R_corr_col_n_0_re, U_0_re, D_0_re); + eigen_vectors_values(n_rx, R_corr_col_n_1_re, U_1_re, D_1_re); + + transpose (n_rx, U_0_re, U_0_herm_re); + transpose (n_rx, U_1_re, U_1_herm_re); + + sigma = (float)(sqrt((double)(sigma2))); + + /*The inverse of a diagonal matrix is obtained by replacing each element in the diagonal with its reciprocal. + A square root of a diagonal matrix is given by the diagonal matrix, whose diagonal entries are just the square + roots of the original matrix.*/ + + + D_0_re_inv_sqrt[0] = sqrt_float(1/D_0_re_inv[0]); + D_0_re_inv_sqrt[5] = sqrt_float(1/D_0_re_inv[5]); + D_0_re_inv_sqrt[10] = sqrt_float(1/D_0_re_inv[10]); + D_0_re_inv_sqrt[15] = sqrt_float(1/D_0_re_inv[15]); + + D_1_re_inv[0] = sqrt_float(1/D_1_re_inv[0]); + D_1_re_inv[5] = sqrt_float(1/D_1_re_inv[5]); + D_1_re_inv[10] = sqrt_float(1/D_1_re_inv[10]); + D_1_re_inv[15] = sqrt_float(1/D_1_re_inv[15]); + + now only to multiply + + free(H0); + free(H1); + free(R_corr_col_n_0); + free(R_corr_col_n_1); +} +#endif + +float sqrt_float(float x, float sqrt_x) +{ + sqrt_x = (float)(sqrt((double)(x))); + return sqrt_x; +} \ No newline at end of file diff --git a/openair1/PHY/LTE_UE_TRANSPORT/linear_preprocessing_rec.h b/openair1/PHY/LTE_UE_TRANSPORT/linear_preprocessing_rec.h new file mode 100755 index 0000000000000000000000000000000000000000..f38bc475567633a97fd55e3ad4cccc4c3c964ace --- /dev/null +++ b/openair1/PHY/LTE_UE_TRANSPORT/linear_preprocessing_rec.h @@ -0,0 +1,121 @@ + +#include<stdio.h> +#include<math.h> +#include<complex.h> +#include <stdlib.h> +#include "PHY/defs_UE.h" + + +/* FUNCTIONS FOR LINEAR PREPROCESSING: MMSE, WHITENNING, etc*/ +void transpose(int N, float complex *A, float complex *Result); + +void conjugate_transpose(int N, float complex *A, float complex *Result); + +void H_hermH_plus_sigma2I(int N, int M, float complex *A, float sigma2, float complex *Result); + +void HH_herm_plus_sigma2I(int M, int N, float complex *A, float sigma2, float complex *Result); + +void eigen_vectors_values(int N, float complex *A, float complex *Vectors, float *Values_Matrix); + +void lin_eq_solver(int N, float complex *A, float complex* B); +//float complex* lin_eq_solver (int N, float complex* A, float complex* B); + +/* mutl_matrix_matrix_row_based performs multiplications when matrix is row-oriented H[0], H[1]; H[2], H[3]*/ +void mutl_matrix_matrix_row_based(float complex* M0, float complex* M1, int rows_M0, int col_M0, int rows_M1, int col_M1, float complex* Result ); + +/* mutl_matrix_matrix_col_based performs multiplications matrix is column-oriented H[0], H[2]; H[1], H[3]*/ +void mutl_matrix_matrix_col_based(float complex* M0, float complex* M1, int rows_M0, int col_M0, int rows_M1, int col_M1, float complex* Result ); + +void compute_MMSE(float complex* H, int order_H, float sigma2, float complex* W_MMSE); + +void compute_white_filter(float complex* H, int order_H, float sigma2, float complex* U_1, float complex* D_1); + +void mmse_processing_oai(LTE_UE_PDSCH *pdsch_vars, + LTE_DL_FRAME_PARMS *frame_parms, + PHY_MEASUREMENTS *measurements, + unsigned char first_symbol_flag, + MIMO_mode_t mimo_mode, + unsigned short mmse_flag, + int noise_power, + unsigned char symbol, + unsigned short nb_rb); + + +void precode_channel_est(int32_t **dl_ch_estimates_ext, + LTE_DL_FRAME_PARMS *frame_parms, + LTE_UE_PDSCH *pdsch_vars, + unsigned char symbol, + unsigned short nb_rb, + MIMO_mode_t mimo_mode); + + +void rxdataF_to_float(int32_t **rxdataF_ext, + float complex **rxdataF_f, + int n_rx, + int length, + int start_point); + +void chan_est_to_float(int32_t **dl_ch_estimates_ext, + float complex **dl_ch_estimates_ext_f, + int n_tx, + int n_rx, + int length, + int start_point); + +void float_to_chan_est(int32_t **dl_ch_estimates_ext, + float complex **dl_ch_estimates_ext_f, + int n_tx, + int n_rx, + int length, + int start_point); + +void float_to_rxdataF(int32_t **rxdataF_ext, + float complex **rxdataF_f, + int n_tx, + int n_rx, + int length, + int start_point); + +void mult_mmse_rxdataF(float complex** Wmmse, + float complex** rxdataF_ext_f, + int n_tx, + int n_rx, + int length, + int start_point); + +void mult_mmse_chan_est(float complex** Wmmse, + float complex** dl_ch_estimates_ext_f, + int n_tx, + int n_rx, + int length, + int start_point); + +void mmse_processing_core(int32_t **rxdataF_ext, + int32_t **dl_ch_estimates_ext, + int sigma2, + int n_tx, + int n_rx, + int length, + int start_point); + +void mmse_processing_core_flp(float complex** rxdataF_ext_flcpx, + float complex **H, + int32_t **rxdataF_ext, + int32_t **dl_ch_estimates_ext, + float sigma2, + int n_tx, + int n_rx, + int length, + int start_point); + +void whitening_processing_core_flp(float complex** rxdataF_ext_flcpx, + float complex **H, + int32_t **rxdataF_ext, + int32_t **dl_ch_estimates_ext, + float sigma2, + int n_tx, + int n_rx, + int length, + int start_point); + +float sqrt_float(float x, float sqrt_x); diff --git a/openair1/PHY/LTE_UE_TRANSPORT/transport_proto_ue.h b/openair1/PHY/LTE_UE_TRANSPORT/transport_proto_ue.h index 178ed53dcc4441b9a06d65ebd5e616c109301d9a..53f593fe295f165364b7fc6dde689328893b5a08 100644 --- a/openair1/PHY/LTE_UE_TRANSPORT/transport_proto_ue.h +++ b/openair1/PHY/LTE_UE_TRANSPORT/transport_proto_ue.h @@ -925,6 +925,7 @@ void dlsch_channel_compensation_TM34(LTE_DL_FRAME_PARMS *frame_parms, int round, MIMO_mode_t mimo_mode, unsigned short nb_rb, + unsigned short mmse_flag, unsigned char output_shift0, unsigned char output_shift1); @@ -957,6 +958,7 @@ void dlsch_channel_level_TM34(int **dl_ch_estimates_ext, int *avg_1, uint8_t symbol, unsigned short nb_rb, + unsigned int mmse_flag, MIMO_mode_t mimo_mode);