From e4a7eda96878e2e43c90926619f3333429cbdc0c Mon Sep 17 00:00:00 2001
From: yihz <yihongzheng@foxmail.com>
Date: Tue, 7 Dec 2021 10:43:28 +0800
Subject: [PATCH] base on 2021.w48, diff_1206_2.diff, UL_NR_MIMO 4*4

---
 diff_1206_2.txt                               | 4257 +++++++++++++++++
 executables/nr-softmodem-common.h             |    1 +
 executables/nr-softmodem.h                    |    2 +
 executables/nr-ue.c                           |    5 +-
 executables/nr-uesoftmodem.c                  |    7 +-
 .../nfapi/public_inc/fapi_nr_ue_interface.h   |    3 +-
 .../nfapi/public_inc/nfapi_nr_interface_scf.h |    3 +-
 nfapi/open-nFAPI/nfapi/src/nfapi_p7.c         |    6 +-
 openair1/PHY/INIT/nr_init.c                   |   20 +-
 openair1/PHY/INIT/nr_init_ru.c                |    2 +-
 openair1/PHY/INIT/nr_init_ue.c                |    5 +-
 .../NR_ESTIMATION/nr_ul_channel_estimation.c  |    9 +-
 .../PHY/NR_TRANSPORT/nr_transport_proto.h     |   30 +-
 openair1/PHY/NR_TRANSPORT/nr_ulsch.c          |   32 +
 openair1/PHY/NR_TRANSPORT/nr_ulsch.h          |    6 +
 .../PHY/NR_TRANSPORT/nr_ulsch_demodulation.c  | 1365 ++++--
 .../NR_TRANSPORT/nr_ulsch_llr_computation.c   |   28 +-
 openair1/PHY/NR_UE_ESTIMATION/filt16a_32.c    |    6 +-
 .../PHY/NR_UE_TRANSPORT/nr_ulsch_coding.c     |    9 +
 openair1/PHY/NR_UE_TRANSPORT/nr_ulsch_ue.c    |  367 +-
 openair1/PHY/defs_gNB.h                       |    6 +-
 openair1/PHY/defs_nr_UE.h                     |    3 +
 openair1/PHY/defs_nr_common.h                 |    8 +-
 openair1/SCHED_NR/nr_ru_procedures.c          |    3 +-
 openair1/SCHED_NR/phy_procedures_nr_gNB.c     |   20 +-
 openair1/SCHED_NR_UE/fapi_nr_ue_l1.c          |    4 +-
 openair1/SCHED_NR_UE/phy_procedures_nr_ue.c   |    9 +-
 openair1/SIMULATION/NR_PHY/dlsim.c            |    2 +-
 openair1/SIMULATION/NR_PHY/ulschsim.c         |   32 +-
 openair1/SIMULATION/NR_PHY/ulsim.c            |  271 +-
 openair2/LAYER2/NR_MAC_COMMON/nr_mac_common.c |    3 +
 openair2/LAYER2/NR_MAC_UE/nr_ue_scheduler.c   |  109 +-
 openair2/LAYER2/NR_MAC_gNB/config.c           |    2 +-
 openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_RA.c |    4 +-
 .../LAYER2/NR_MAC_gNB/gNB_scheduler_dlsch.c   |    3 +-
 .../LAYER2/NR_MAC_gNB/gNB_scheduler_phytest.c |   14 +-
 .../NR_MAC_gNB/gNB_scheduler_primitives.c     |   28 +-
 .../LAYER2/NR_MAC_gNB/gNB_scheduler_ulsch.c   |   34 +-
 openair2/LAYER2/NR_MAC_gNB/mac_proto.h        |    1 +
 openair2/LAYER2/NR_MAC_gNB/nr_mac_gNB.h       |    3 +-
 openair2/RRC/NR/MESSAGES/asn1_msg.c           |    1 +
 openair2/RRC/NR/nr_rrc_proto.h                |    2 +
 openair2/RRC/NR/rrc_gNB_nsa.c                 |    4 +-
 openair2/RRC/NR/rrc_gNB_reconfig.c            |   13 +-
 targets/ARCH/rfsimulator/simulator.c          |   17 +-
 ....sa.band78.fr1.106PRB.usrpb210.4layer.conf |  320 ++
 46 files changed, 6450 insertions(+), 629 deletions(-)
 create mode 100644 diff_1206_2.txt
 create mode 100644 targets/PROJECTS/GENERIC-NR-5GC/CONF/gnb.sa.band78.fr1.106PRB.usrpb210.4layer.conf

diff --git a/diff_1206_2.txt b/diff_1206_2.txt
new file mode 100644
index 00000000000..ff352f4157f
--- /dev/null
+++ b/diff_1206_2.txt
@@ -0,0 +1,4257 @@
+diff --git a/executables/nr-softmodem-common.h b/executables/nr-softmodem-common.h
+index c02155326b..1238b43c27 100644
+--- a/executables/nr-softmodem-common.h
++++ b/executables/nr-softmodem-common.h
+@@ -67,6 +67,7 @@
+ #define CONFIG_HLP_ITTIL         "Generate ITTI analyzser logs (similar to wireshark logs but with more details)\n"
+ #define CONFIG_HLP_DLMCS_PHYTEST "Set the downlink MCS for PHYTEST mode\n"
+ #define CONFIG_HLP_DLNL_PHYTEST "Set the downlink nrOfLayers for PHYTEST mode\n"
++#define CONFIG_HLP_ULNL_PHYTEST "Set the uplink nrOfLayers for PHYTEST mode\n"
+ #define CONFIG_HLP_STMON         "Enable processing timing measurement of lte softmodem on per subframe basis \n"
+ #define CONFIG_HLP_MSLOTS        "Skip the missed slots/subframes \n"
+ #define CONFIG_HLP_ULMCS_PHYTEST "Set the uplink MCS for PHYTEST mode\n"
+diff --git a/executables/nr-softmodem.h b/executables/nr-softmodem.h
+index 896c363cbf..a11572c0e1 100644
+--- a/executables/nr-softmodem.h
++++ b/executables/nr-softmodem.h
+@@ -23,6 +23,7 @@
+     {"E" ,                    CONFIG_HLP_TQFS,        PARAMFLAG_BOOL,   i8ptr:&threequarter_fs,             defintval:0,                   TYPE_INT8,   0},        \
+     {"m" ,                    CONFIG_HLP_DLMCS_PHYTEST,0,               uptr:&target_dl_mcs,                defintval:0,                   TYPE_UINT,   0},        \
+     {"l" ,                    CONFIG_HLP_DLNL_PHYTEST,0,                uptr:&target_dl_Nl,                 defintval:0,                   TYPE_UINT,   0},        \
++    {"L" ,                    CONFIG_HLP_ULNL_PHYTEST,0,                uptr:&target_ul_Nl,                 defintval:0,                   TYPE_UINT,   0},        \
+     {"t" ,                    CONFIG_HLP_ULMCS_PHYTEST,0,               uptr:&target_ul_mcs,                defintval:0,                   TYPE_UINT,   0},        \
+     {"M" ,                    CONFIG_HLP_DLBW_PHYTEST,0,                uptr:&target_dl_bw,                 defintval:0,                   TYPE_UINT,   0},        \
+     {"T" ,                    CONFIG_HLP_ULBW_PHYTEST,0,                uptr:&target_ul_bw,                 defintval:0,                   TYPE_UINT,   0},        \
+@@ -36,6 +37,7 @@
+ extern threads_t threads;
+ extern uint32_t target_dl_mcs;
+ extern uint32_t target_dl_Nl;
++extern uint32_t target_ul_Nl;
+ extern uint32_t target_ul_mcs;
+ extern uint32_t target_dl_bw;
+ extern uint32_t target_ul_bw;
+diff --git a/executables/nr-ue.c b/executables/nr-ue.c
+index 60ff40abeb..a23f91b1dd 100644
+--- a/executables/nr-ue.c
++++ b/executables/nr-ue.c
+@@ -278,7 +278,7 @@ void processSlotTX(void *arg) {
+   int tx_slot_type = nr_ue_slot_select(cfg, proc->frame_tx, proc->nr_slot_tx);
+   uint8_t gNB_id = 0;
+ 
+-  LOG_D(PHY,"%d.%d => slot type %d\n",proc->frame_tx,proc->nr_slot_tx,tx_slot_type);
++  LOG_D(PHY,"processSlotTX %d.%d => slot type %d\n",proc->frame_tx,proc->nr_slot_tx,tx_slot_type);
+   if (tx_slot_type == NR_UPLINK_SLOT || tx_slot_type == NR_MIXED_SLOT){
+ 
+     // trigger L2 to run ue_scheduler thru IF module
+@@ -372,7 +372,8 @@ void processSlotRX(void *arg) {
+       LOG_D(PHY, "Sending Uplink data \n");
+       nr_ue_pusch_common_procedures(UE,
+                                     proc->nr_slot_tx,
+-                                    &UE->frame_parms,1);
++                                    &UE->frame_parms,
++                                    UE->frame_parms.nb_antennas_tx);
+     }
+ 
+     if (UE->UE_mode[gNB_id] > NOT_SYNCHED && UE->UE_mode[gNB_id] < PUSCH) {
+diff --git a/executables/nr-uesoftmodem.c b/executables/nr-uesoftmodem.c
+index ad5ea982c9..57fd284601 100644
+--- a/executables/nr-uesoftmodem.c
++++ b/executables/nr-uesoftmodem.c
+@@ -465,8 +465,11 @@ int main( int argc, char **argv ) {
+     memset(UE[CC_id],0,sizeof(PHY_VARS_NR_UE));
+ 
+     set_options(CC_id, UE[CC_id]);
+-    NR_UE_MAC_INST_t *mac = get_mac_inst(0);
+-
++    NR_DL_FRAME_PARMS *fp = &UE[CC_id]->frame_parms;
++    NR_UE_MAC_INST_t *mac = get_mac_inst(0);  
++    
++	mac->phy_config.config_req.carrier_config.num_tx_ant = fp->nb_antennas_tx;
++	
+     if (get_softmodem_params()->sa) {
+       uint16_t nr_band = get_band(downlink_frequency[CC_id][0],uplink_frequency_offset[CC_id][0]);
+       mac->nr_band = nr_band;
+diff --git a/nfapi/open-nFAPI/nfapi/public_inc/fapi_nr_ue_interface.h b/nfapi/open-nFAPI/nfapi/public_inc/fapi_nr_ue_interface.h
+index 0e923931ad..8b93417cb0 100644
+--- a/nfapi/open-nFAPI/nfapi/public_inc/fapi_nr_ue_interface.h
++++ b/nfapi/open-nFAPI/nfapi/public_inc/fapi_nr_ue_interface.h
+@@ -298,9 +298,10 @@ typedef struct
+   uint8_t  qam_mod_order;
+   uint8_t  mcs_index;
+   uint8_t  mcs_table;
+-  uint8_t  transform_precoding;
++  uint8_t  transformPrecoder;
+   uint16_t data_scrambling_id;
+   uint8_t  nrOfLayers;
++  uint8_t  Tpmi;
+   //DMRS
+   uint16_t  ul_dmrs_symb_pos;
+   uint8_t  dmrs_config_type;
+diff --git a/nfapi/open-nFAPI/nfapi/public_inc/nfapi_nr_interface_scf.h b/nfapi/open-nFAPI/nfapi/public_inc/nfapi_nr_interface_scf.h
+index 9d245446c8..58ff9037ce 100644
+--- a/nfapi/open-nFAPI/nfapi/public_inc/nfapi_nr_interface_scf.h
++++ b/nfapi/open-nFAPI/nfapi/public_inc/nfapi_nr_interface_scf.h
+@@ -1208,9 +1208,10 @@ typedef struct
+   uint8_t  qam_mod_order;
+   uint8_t  mcs_index;
+   uint8_t  mcs_table;
+-  uint8_t  transform_precoding;
++  uint8_t  transformPrecoder;
+   uint16_t data_scrambling_id;
+   uint8_t  nrOfLayers;
++  uint8_t  Tpmi;
+   //DMRS
+   uint16_t  ul_dmrs_symb_pos;
+   uint8_t  dmrs_config_type;
+diff --git a/nfapi/open-nFAPI/nfapi/src/nfapi_p7.c b/nfapi/open-nFAPI/nfapi/src/nfapi_p7.c
+index 7cb74f8562..473b0900bc 100644
+--- a/nfapi/open-nFAPI/nfapi/src/nfapi_p7.c
++++ b/nfapi/open-nFAPI/nfapi/src/nfapi_p7.c
+@@ -941,9 +941,10 @@ static uint8_t pack_ul_tti_request_pusch_pdu(nfapi_nr_pusch_pdu_t *pusch_pdu, ui
+         push8(pusch_pdu->qam_mod_order, ppWritePackedMsg, end) &&
+         push8(pusch_pdu->mcs_index, ppWritePackedMsg, end) &&
+         push8(pusch_pdu->mcs_table, ppWritePackedMsg, end) &&
+-        push8(pusch_pdu->transform_precoding, ppWritePackedMsg, end) &&
++        push8(pusch_pdu->transformPrecoder, ppWritePackedMsg, end) &&
+         push16(pusch_pdu->data_scrambling_id, ppWritePackedMsg, end) &&
+         push8(pusch_pdu->nrOfLayers, ppWritePackedMsg, end) &&
++        push8(pusch_pdu->Tpmi, ppWritePackedMsg, end) &&
+         push16(pusch_pdu->ul_dmrs_symb_pos, ppWritePackedMsg, end) &&
+         push8(pusch_pdu->dmrs_config_type, ppWritePackedMsg, end) &&
+         push16(pusch_pdu->ul_dmrs_scrambling_id, ppWritePackedMsg, end) &&
+@@ -4445,9 +4446,10 @@ static uint8_t unpack_ul_tti_request_pusch_pdu(void *tlv, uint8_t **ppReadPacked
+         pull8(ppReadPackedMsg, &pusch_pdu->qam_mod_order,  end) &&
+         pull8(ppReadPackedMsg, &pusch_pdu->mcs_index,  end) &&
+         pull8(ppReadPackedMsg, &pusch_pdu->mcs_table, end) &&
+-        pull8(ppReadPackedMsg, &pusch_pdu->transform_precoding, end) &&
++        pull8(ppReadPackedMsg, &pusch_pdu->transformPrecoder, end) &&
+         pull16(ppReadPackedMsg, &pusch_pdu->data_scrambling_id, end) &&
+         pull8(ppReadPackedMsg, &pusch_pdu->nrOfLayers, end) &&
++        pull8(ppReadPackedMsg, &pusch_pdu->Tpmi, end) &&
+         pull16(ppReadPackedMsg, &pusch_pdu->ul_dmrs_symb_pos, end) &&
+         pull8(ppReadPackedMsg, &pusch_pdu->dmrs_config_type, end) &&
+         pull16(ppReadPackedMsg, &pusch_pdu->ul_dmrs_scrambling_id, end) &&
+diff --git a/openair1/PHY/INIT/nr_init.c b/openair1/PHY/INIT/nr_init.c
+index cd4848ea8c..2ac3230215 100644
+--- a/openair1/PHY/INIT/nr_init.c
++++ b/openair1/PHY/INIT/nr_init.c
+@@ -276,11 +276,21 @@ int phy_init_nr_gNB(PHY_VARS_gNB *gNB,
+     pusch_vars[ULSCH_id]->ul_ch_magb0           = (int32_t **)malloc16(n_buf*sizeof(int32_t *) );
+     pusch_vars[ULSCH_id]->ul_ch_mag             = (int32_t **)malloc16(n_buf*sizeof(int32_t *) );
+     pusch_vars[ULSCH_id]->ul_ch_magb            = (int32_t **)malloc16(n_buf*sizeof(int32_t *) );
+-    pusch_vars[ULSCH_id]->rho                   = (int32_t **)malloc16_clear(n_buf*sizeof(int32_t*) );
++    pusch_vars[ULSCH_id]->rho                   = (int32_t ***)malloc16_clear(Prx*sizeof(int32_t**) );
++    pusch_vars[ULSCH_id]->llr_layers            = (int16_t **)malloc16(max_ul_mimo_layers*sizeof(int32_t *) );
+ 
+     for (i=0; i<Prx; i++) {
+       pusch_vars[ULSCH_id]->rxdataF_ext[i]           = (int32_t *)malloc16_clear( sizeof(int32_t)*N_RB_UL*12*fp->symbols_per_slot );
+       pusch_vars[ULSCH_id]->rxdataF_ext2[i]          = (int32_t *)malloc16_clear( sizeof(int32_t)*N_RB_UL*12*fp->symbols_per_slot );
++      pusch_vars[ULSCH_id]->rho[i]                   = (int32_t **)malloc16_clear( NR_MAX_NB_LAYERS*NR_MAX_NB_LAYERS*sizeof(int32_t*));
++
++      for (int j=0; j< max_ul_mimo_layers; j++) 
++      {
++        for (int k=0; k<max_ul_mimo_layers; k++) 
++        {
++          pusch_vars[ULSCH_id]->rho[i][j*max_ul_mimo_layers+k]=(int32_t *)malloc16_clear( sizeof(int32_t) * fp->N_RB_UL*12*7*2 );
++        }
++      }
+     }
+     for (i=0; i<n_buf; i++) {
+       pusch_vars[ULSCH_id]->ul_ch_estimates[i]       = (int32_t *)malloc16_clear( sizeof(int32_t)*fp->ofdm_symbol_size*2*fp->symbols_per_slot );
+@@ -293,8 +303,12 @@ int phy_init_nr_gNB(PHY_VARS_gNB *gNB,
+       pusch_vars[ULSCH_id]->ul_ch_mag0[i]            = (int32_t *)malloc16_clear( fp->symbols_per_slot*sizeof(int32_t)*N_RB_UL*12 );
+       pusch_vars[ULSCH_id]->ul_ch_magb0[i]           = (int32_t *)malloc16_clear( fp->symbols_per_slot*sizeof(int32_t)*N_RB_UL*12 );
+       pusch_vars[ULSCH_id]->ul_ch_mag[i]             = (int32_t *)malloc16_clear( fp->symbols_per_slot*sizeof(int32_t)*N_RB_UL*12 );
+-      pusch_vars[ULSCH_id]->ul_ch_magb[i]            = (int32_t *)malloc16_clear( fp->symbols_per_slot*sizeof(int32_t)*N_RB_UL*12 );
+-      pusch_vars[ULSCH_id]->rho[i]                   = (int32_t *)malloc16_clear( sizeof(int32_t)*(fp->N_RB_UL*12*7*2) );
++      pusch_vars[ULSCH_id]->ul_ch_magb[i]            = (int32_t *)malloc16_clear( fp->symbols_per_slot*sizeof(int32_t)*N_RB_UL*12 );      
++    }
++
++    for (i=0; i< max_ul_mimo_layers; i++) 
++    {
++      pusch_vars[ULSCH_id]->llr_layers[i] = (int16_t *)malloc16_clear( (8*((3*8*6144)+12))*sizeof(int16_t) ); // [hna] 6144 is LTE and (8*((3*8*6144)+12)) is not clear
+     }
+     pusch_vars[ULSCH_id]->llr = (int16_t *)malloc16_clear( (8*((3*8*6144)+12))*sizeof(int16_t) ); // [hna] 6144 is LTE and (8*((3*8*6144)+12)) is not clear
+     pusch_vars[ULSCH_id]->ul_valid_re_per_slot  = (int16_t *)malloc16_clear( sizeof(int16_t)*fp->symbols_per_slot);
+diff --git a/openair1/PHY/INIT/nr_init_ru.c b/openair1/PHY/INIT/nr_init_ru.c
+index 4024ed1617..169db48e69 100644
+--- a/openair1/PHY/INIT/nr_init_ru.c
++++ b/openair1/PHY/INIT/nr_init_ru.c
+@@ -41,7 +41,7 @@ int nr_phy_init_RU(RU_t *ru) {
+   int p;
+   int re;
+ 
+-  LOG_I(PHY,"Initializing RU signal buffers (if_south %s) nb_tx %d\n",ru_if_types[ru->if_south],ru->nb_tx);
++  LOG_I(PHY,"Initializing RU signal buffers (if_south %s) nb_tx %d, nb_rx %d\n",ru_if_types[ru->if_south],ru->nb_tx, ru->nb_rx);
+ 
+   nfapi_nr_config_request_scf_t *cfg;
+   ru->nb_log_antennas=0;
+diff --git a/openair1/PHY/INIT/nr_init_ue.c b/openair1/PHY/INIT/nr_init_ue.c
+index b60e77e814..11fb6e7d61 100644
+--- a/openair1/PHY/INIT/nr_init_ue.c
++++ b/openair1/PHY/INIT/nr_init_ue.c
+@@ -124,7 +124,8 @@ void phy_init_nr_ue_PUSCH(NR_UE_PUSCH *const pusch,
+   AssertFatal( pusch, "pusch==0" );
+ 
+   for (int i=0; i<NR_MAX_NB_LAYERS; i++) {
+-    pusch->txdataF_layers[i] = (int32_t *)malloc16_clear(NR_MAX_PUSCH_ENCODED_LENGTH*sizeof(int32_t));
++    pusch->txdataF_layers[i] = (int32_t *)malloc16_clear((NR_MAX_PUSCH_ENCODED_LENGTH)*sizeof(int32_t *));
++    pusch->txdataF_precoding[i] = (int32_t *)malloc16_clear((NR_MAX_PUSCH_ENCODED_LENGTH)*sizeof(int32_t *));
+   }
+ }
+ 
+@@ -143,7 +144,7 @@ int init_nr_ue_signal(PHY_VARS_NR_UE *ue,
+   uint16_t N_n_scid[2] = {0,1}; // [HOTFIX] This is a temporary implementation of scramblingID0 and scramblingID1 which are given by DMRS-UplinkConfig
+   int n_scid;
+   abstraction_flag = 0;
+-  LOG_I(PHY, "Initializing UE vars (abstraction %u) for gNB TXant %u, UE RXant %u\n", abstraction_flag, fp->nb_antennas_tx, fp->nb_antennas_rx);
++  LOG_I(PHY, "Initializing UE vars (abstraction %u) for UE TXant %u, UE RXant %u\n", abstraction_flag, fp->nb_antennas_tx, fp->nb_antennas_rx);
+   //LOG_D(PHY,"[MSC_NEW][FRAME 00000][PHY_UE][MOD %02u][]\n", ue->Mod_id+NB_gNB_INST);
+   phy_init_nr_top(ue);
+   // many memory allocation sizes are hard coded
+diff --git a/openair1/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c b/openair1/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c
+index 1f164748bf..9a432e8615 100644
+--- a/openair1/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c
++++ b/openair1/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c
+@@ -110,7 +110,7 @@ int nr_pusch_channel_estimation(PHY_VARS_gNB *gNB,
+          fr = filt8_r1;
+          fmm = filt8_mm1;
+          fml = filt8_ml1;
+-         fmr = filt8_m1;
++         fmr = filt8_mm1;
+          fdcl = filt8_dcl1;
+          fdcr = filt8_dcr1;
+          fdclh = filt8_dcl1_h;
+@@ -129,9 +129,8 @@ int nr_pusch_channel_estimation(PHY_VARS_gNB *gNB,
+ 
+   //------------------generate DMRS------------------//
+ 
+-  // transform precoding = 1 means disabled
+-  if (pusch_pdu->transform_precoding == 1) {
+-    nr_pusch_dmrs_rx(gNB, Ns, gNB->nr_gold_pusch_dmrs[pusch_pdu->scid][Ns][symbol], &pilot[0], 1000, 0, nb_rb_pusch,
++  if (pusch_pdu->transformPrecoder == transformPrecoder_disabled) {
++    nr_pusch_dmrs_rx(gNB, Ns, gNB->nr_gold_pusch_dmrs[pusch_pdu->scid][Ns][symbol], &pilot[0], (1000+p), 0, nb_rb_pusch,
+                      (pusch_pdu->bwp_start + pusch_pdu->rb_start)*NR_NB_SC_PER_RB, pusch_pdu->dmrs_config_type);
+   }
+   else {  // if transform precoding or SC-FDMA is enabled in Uplink
+@@ -291,7 +290,7 @@ int nr_pusch_channel_estimation(PHY_VARS_gNB *gNB,
+         printf("pilot %u : rxF - > (%d,%d) (%d) ch -> (%d,%d) (%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],dBc(rxF[0],rxF[1]),ch[0],ch[1],dBc(ch[0],ch[1]),pil[0],pil[1]);
+ 	printf("data %u : rxF - > (%d,%d) (%d)\n",pilot_cnt,rxF[2],rxF[3],dBc(rxF[2],rxF[3]));
+   #endif
+-        multadd_real_vector_complex_scalar(fml,
++        multadd_real_vector_complex_scalar(fm,
+                                            ch,
+                                            ul_ch,
+                                            8);
+diff --git a/openair1/PHY/NR_TRANSPORT/nr_transport_proto.h b/openair1/PHY/NR_TRANSPORT/nr_transport_proto.h
+index 4fed3a468f..87e4d57362 100644
+--- a/openair1/PHY/NR_TRANSPORT/nr_transport_proto.h
++++ b/openair1/PHY/NR_TRANSPORT/nr_transport_proto.h
+@@ -137,21 +137,22 @@ int nr_rx_pusch(PHY_VARS_gNB *gNB,
+     @param nb_rb_pusch The number of RBs allocated (used for Resource Allocation Type 1 in NR)
+     @param frame_parms, Pointer to frame descriptor structure
+ */
+-void nr_ulsch_extract_rbs_single(int32_t **rxdataF,
+-                                 NR_gNB_PUSCH *pusch_vars,
+-                                 int slot,
+-                                 unsigned char symbol,
+-                                 uint8_t is_dmrs_symbol,
+-                                 nfapi_nr_pusch_pdu_t *pusch_pdu,
+-                                 NR_DL_FRAME_PARMS *frame_parms);
++void nr_ulsch_extract_rbs(int32_t **rxdataF,
++                          NR_gNB_PUSCH *pusch_vars,
++                          int slot,
++                          unsigned char symbol,
++                          uint8_t is_dmrs_symbol,
++                          nfapi_nr_pusch_pdu_t *pusch_pdu,
++                          NR_DL_FRAME_PARMS *frame_parms);
+ 
+ void nr_ulsch_scale_channel(int32_t **ul_ch_estimates_ext,
+                             NR_DL_FRAME_PARMS *frame_parms,
+                             NR_gNB_ULSCH_t **ulsch_gNB,
+-                            uint8_t symbol,
+-                            uint8_t start_symbol,
+-                            uint16_t nb_rb,
+-                            pusch_dmrs_type_t pusch_dmrs_type);
++                            uint8_t symbol, 
++                            uint8_t is_dmrs_symbol,                           
++                            uint32_t len,
++                            uint8_t nrOfLayers,
++                            uint16_t nb_rb);
+ 
+ /** \brief This function computes the average channel level over all allocated RBs and antennas (TX/RX) in order to compute output shift for compensated signal
+     @param ul_ch_estimates_ext Channel estimates in allocated RBs
+@@ -185,9 +186,10 @@ void nr_ulsch_channel_compensation(int **rxdataF_ext,
+                                 int **ul_ch_mag,
+                                 int **ul_ch_magb,
+                                 int **rxdataF_comp,
+-                                int **rho,
++                                int ***rho,
+                                 NR_DL_FRAME_PARMS *frame_parms,
+                                 unsigned char symbol,
++                                int length,
+                                 uint8_t is_dmrs_symbol,
+                                 unsigned char mod_order,
+                                 uint8_t  nrOfLayers,
+@@ -255,8 +257,8 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
+     @param mod_order modulation order
+ */
+ void nr_ulsch_compute_llr(int32_t *rxdataF_comp,
+-                          int32_t **ul_ch_mag,
+-                          int32_t **ul_ch_magb,
++                          int32_t *ul_ch_mag,
++                          int32_t *ul_ch_magb,
+                           int16_t  *ulsch_llr,
+                           uint32_t nb_rb,
+                           uint32_t nb_re,
+diff --git a/openair1/PHY/NR_TRANSPORT/nr_ulsch.c b/openair1/PHY/NR_TRANSPORT/nr_ulsch.c
+index 4278338dbc..3ea19a9970 100644
+--- a/openair1/PHY/NR_TRANSPORT/nr_ulsch.c
++++ b/openair1/PHY/NR_TRANSPORT/nr_ulsch.c
+@@ -137,6 +137,38 @@ void nr_ulsch_unscrambling_optim(int16_t* llr,
+ #endif
+ }
+ 
++void nr_ulsch_layer_demapping(int16_t *llr_cw,
++				     uint8_t Nl,
++				     uint8_t mod_order,
++				     uint32_t length,
++				     int16_t **llr_layers) 
++{
++
++  switch (Nl) {
++
++    case 1:
++      memcpy((void*)llr_cw, (void*)llr_layers[0], (length)*sizeof(int16_t));
++    break;
++
++    case 2:
++    case 3:
++    case 4:
++      for (int i=0; i<(length/Nl/mod_order); i++)
++      {
++        for (int l=0; l<Nl; l++) 
++        {
++        	for (int m=0; m<mod_order; m++){
++        		llr_cw[i*Nl*mod_order+l*mod_order+m] = llr_layers[l][i*mod_order+m];
++        	}
++        }
++  	  }
++    break;
++
++  default:
++  AssertFatal(0, "Not supported number of layers %d\n", Nl);
++  }
++}
++
+ void dump_pusch_stats(FILE *fd,PHY_VARS_gNB *gNB) {
+ 
+   for (int i=0;i<gNB->number_of_nr_ulsch_max;i++) {
+diff --git a/openair1/PHY/NR_TRANSPORT/nr_ulsch.h b/openair1/PHY/NR_TRANSPORT/nr_ulsch.h
+index 80c10d74b1..0e660351ae 100644
+--- a/openair1/PHY/NR_TRANSPORT/nr_ulsch.h
++++ b/openair1/PHY/NR_TRANSPORT/nr_ulsch.h
+@@ -82,6 +82,12 @@ void nr_ulsch_unscrambling_optim(int16_t* llr,
+ 				 uint32_t Nid,
+ 				 uint32_t n_RNTI);
+ 
++void nr_ulsch_layer_demapping(int16_t *llr_cw,
++				     uint8_t Nl,
++				     uint8_t mod_order,
++				     uint32_t length,
++				     int16_t **llr_layers); 
++
+ void nr_ulsch_procedures(PHY_VARS_gNB *gNB,
+                          int frame_rx,
+                          int slot_rx,
+diff --git a/openair1/PHY/NR_TRANSPORT/nr_ulsch_demodulation.c b/openair1/PHY/NR_TRANSPORT/nr_ulsch_demodulation.c
+index 28dd5ab219..8314c83ad9 100644
+--- a/openair1/PHY/NR_TRANSPORT/nr_ulsch_demodulation.c
++++ b/openair1/PHY/NR_TRANSPORT/nr_ulsch_demodulation.c
+@@ -302,26 +302,27 @@ void nr_idft(int32_t *z, uint32_t Msc_PUSCH)
+ }
+ 
+ 
+-void nr_ulsch_extract_rbs_single(int32_t **rxdataF,
+-                                 NR_gNB_PUSCH *pusch_vars,
+-                                 int slot,
+-                                 unsigned char symbol,
+-                                 uint8_t is_dmrs_symbol,
+-                                 nfapi_nr_pusch_pdu_t *pusch_pdu,
+-                                 NR_DL_FRAME_PARMS *frame_parms)
++void nr_ulsch_extract_rbs(int32_t **rxdataF,
++                          NR_gNB_PUSCH *pusch_vars,
++                          int slot,
++                          unsigned char symbol,
++                          uint8_t is_dmrs_symbol,
++                          nfapi_nr_pusch_pdu_t *pusch_pdu,
++                          NR_DL_FRAME_PARMS *frame_parms)
+ {
+ 
+   unsigned short start_re, re, nb_re_pusch;
+-  unsigned char aarx;
++  unsigned char aarx, aatx;
+   uint32_t rxF_ext_index = 0;
+   uint32_t ul_ch0_ext_index = 0;
+   uint32_t ul_ch0_index = 0;
+-  uint8_t k_prime;
+-  uint16_t n;
++  //uint8_t k_prime;
++  //uint16_t n;
+   int16_t *rxF,*rxF_ext;
+   int *ul_ch0,*ul_ch0_ext;
+-  uint8_t delta = 0;
++  //uint8_t delta = 0;
+   int soffset = (slot&3)*frame_parms->symbols_per_slot*frame_parms->ofdm_symbol_size;
++
+ #ifdef DEBUG_RB_EXT
+ 
+   printf("--------------------symbol = %d-----------------------\n", symbol);
+@@ -329,7 +330,7 @@ void nr_ulsch_extract_rbs_single(int32_t **rxdataF,
+ 
+ #endif
+ 
+-  uint8_t is_dmrs_re;
++  uint8_t is_data_re;
+   start_re = (frame_parms->first_carrier_offset + (pusch_pdu->rb_start + pusch_pdu->bwp_start) * NR_NB_SC_PER_RB)%frame_parms->ofdm_symbol_size;
+   nb_re_pusch = NR_NB_SC_PER_RB * pusch_pdu->rb_size;
+ #ifdef __AVX2__
+@@ -338,64 +339,95 @@ void nr_ulsch_extract_rbs_single(int32_t **rxdataF,
+   int nb_re_pusch2 = nb_re_pusch;
+ #endif
+ 
+-  for (aarx = 0; aarx < frame_parms->nb_antennas_rx; aarx++) {
++  for (aarx = 0; aarx < frame_parms->nb_antennas_rx; aarx++) 
++  {
+ 
+     rxF       = (int16_t *)&rxdataF[aarx][soffset+(symbol * frame_parms->ofdm_symbol_size)];
+     rxF_ext   = (int16_t *)&pusch_vars->rxdataF_ext[aarx][symbol * nb_re_pusch2]; // [hna] rxdataF_ext isn't contiguous in order to solve an alignment problem ib llr computation in case of mod_order = 4, 6
+-
+-    ul_ch0     = &pusch_vars->ul_ch_estimates[aarx][pusch_vars->dmrs_symbol*frame_parms->ofdm_symbol_size]; // update channel estimates if new dmrs symbol are available
+-
+-    ul_ch0_ext = &pusch_vars->ul_ch_estimates_ext[aarx][symbol*nb_re_pusch2];
+-
+-    n = 0;
+-    k_prime = 0;
+-    rxF_ext_index = 0;
+-    ul_ch0_ext_index = 0;
+-    ul_ch0_index = 0;
+-
+-    if (is_dmrs_symbol == 0) {
++    
++    if (is_dmrs_symbol == 0) 
++    {
+       //
+       //rxF[ ((start_re + re)*2)      % (frame_parms->ofdm_symbol_size*2)]);
+-      if (start_re + nb_re_pusch < frame_parms->ofdm_symbol_size) {
++      if (start_re + nb_re_pusch < frame_parms->ofdm_symbol_size) 
++      {
+         memcpy1((void*)rxF_ext,
+                 (void*)&rxF[start_re*2],
+                 nb_re_pusch*sizeof(int32_t));
+-      } else {
+-	int neg_length = frame_parms->ofdm_symbol_size-start_re;
+-	int pos_length = nb_re_pusch-neg_length;
++      } 
++      else 
++      {
++        int neg_length = frame_parms->ofdm_symbol_size-start_re;
++        int pos_length = nb_re_pusch-neg_length;
++        memcpy1((void*)rxF_ext,(void*)&rxF[start_re*2],neg_length*sizeof(int32_t));
++        memcpy1((void*)&rxF_ext[2*neg_length],(void*)rxF,pos_length*sizeof(int32_t));
++      }
+ 
+-	memcpy1((void*)rxF_ext,(void*)&rxF[start_re*2],neg_length*sizeof(int32_t));
+-	memcpy1((void*)&rxF_ext[2*neg_length],(void*)rxF,pos_length*sizeof(int32_t));
++      #ifdef SUPPORT_PMI_MATRIC  
++      for (aatx = 0; aatx < pusch_pdu->nrOfLayers; aatx++)
++      #else
++      aatx = aarx;
++      #endif
++      {
++        ul_ch0     = &pusch_vars->ul_ch_estimates[aatx*frame_parms->nb_antennas_rx+aarx][pusch_vars->dmrs_symbol*frame_parms->ofdm_symbol_size]; // update channel estimates if new dmrs symbol are available
++        ul_ch0_ext = &pusch_vars->ul_ch_estimates_ext[aatx*frame_parms->nb_antennas_rx+aarx][symbol*nb_re_pusch2];
++  
++        memcpy1((void*)ul_ch0_ext,(void*)ul_ch0,nb_re_pusch*sizeof(int32_t));
+       }
+-      memcpy1((void*)ul_ch0_ext,(void*)ul_ch0,nb_re_pusch*sizeof(int32_t));
+     }
+-    else {
+-      for (re = 0; re < nb_re_pusch; re++) {
+-
+-        is_dmrs_re = (re == get_dmrs_freq_idx_ul(n, k_prime, delta, pusch_pdu->dmrs_config_type));
+-
+-#ifdef DEBUG_RB_EXT
+-        printf("re = %d, kprime %d, n %d, is_dmrs_symbol = %d, symbol = %d\n", re, k_prime, n, is_dmrs_symbol, symbol);
+-#endif
+-
+-        /* save only data and respective channel estimates */
+-        if (is_dmrs_re == 0) {
+-          rxF_ext[rxF_ext_index]     = (rxF[ ((start_re + re)*2)      % (frame_parms->ofdm_symbol_size*2)]);
+-          rxF_ext[rxF_ext_index + 1] = (rxF[(((start_re + re)*2) + 1) % (frame_parms->ofdm_symbol_size*2)]);
+-          ul_ch0_ext[ul_ch0_ext_index] = ul_ch0[ul_ch0_index];
++    else 
++    { 
++      #ifdef SUPPORT_PMI_MATRIC  
++      for (aatx = 0; aatx < pusch_pdu->nrOfLayers; aatx++)
++      #else
++      aatx = aarx;
++      #endif      
++      {
++        ul_ch0     = &pusch_vars->ul_ch_estimates[aatx*frame_parms->nb_antennas_rx+aarx][pusch_vars->dmrs_symbol*frame_parms->ofdm_symbol_size]; // update channel estimates if new dmrs symbol are available
++        ul_ch0_ext = &pusch_vars->ul_ch_estimates_ext[aatx*frame_parms->nb_antennas_rx+aarx][symbol*nb_re_pusch2];
++
++        //n = 0;
++        //k_prime = 0;
++        rxF_ext_index = 0;
++        ul_ch0_ext_index = 0;
++        ul_ch0_index = 0;        
++        
++        for (re = 0; re < nb_re_pusch; re++) 
++        {  
++          uint16_t k= start_re+re;
++                
++          is_data_re = allowed_xlsch_re_in_dmrs_symbol(k, start_re, frame_parms->ofdm_symbol_size, pusch_pdu->num_dmrs_cdm_grps_no_data, pusch_pdu->dmrs_config_type);
++          
++          if (++k >= frame_parms->ofdm_symbol_size)
++          {
++            k -= frame_parms->ofdm_symbol_size;
++          }
+ 
+-#ifdef DEBUG_RB_EXT
+-          printf("dmrs symb %d: rxF_ext[%d] = (%d,%d), ul_ch0_ext[%d] = (%d,%d)\n",
++          #ifdef DEBUG_RB_EXT
++          printf("re = %d, kprime %d, n %d, is_dmrs_symbol = %d, symbol = %d\n", re, k_prime, n, is_dmrs_symbol, symbol);
++          #endif
++
++          /* save only data and respective channel estimates */
++          if (is_data_re == 1) 
++          { 
++            if (aatx == aarx)
++            {
++              rxF_ext[rxF_ext_index]     = (rxF[ ((start_re + re)*2)      % (frame_parms->ofdm_symbol_size*2)]);
++              rxF_ext[rxF_ext_index + 1] = (rxF[(((start_re + re)*2) + 1) % (frame_parms->ofdm_symbol_size*2)]);
++              rxF_ext_index +=2;
++            }
++          
++            ul_ch0_ext[ul_ch0_ext_index] = ul_ch0[ul_ch0_index];
++            ul_ch0_ext_index++;
++
++            #ifdef DEBUG_RB_EXT
++            printf("dmrs symb %d: rxF_ext[%d] = (%d,%d), ul_ch0_ext[%d] = (%d,%d)\n",
+                  is_dmrs_symbol,rxF_ext_index>>1, rxF_ext[rxF_ext_index],rxF_ext[rxF_ext_index+1],
+                  ul_ch0_ext_index,  ((int16_t*)&ul_ch0_ext[ul_ch0_ext_index])[0],  ((int16_t*)&ul_ch0_ext[ul_ch0_ext_index])[1]);
+-#endif
+-          ul_ch0_ext_index++;
+-          rxF_ext_index +=2;
+-        } else {
+-          n += k_prime;
+-          k_prime ^= 1;
++            #endif          
++          } 
++          ul_ch0_index++;
+         }
+-        ul_ch0_index++;
+       }
+     }
+   }
+@@ -406,16 +438,19 @@ void nr_ulsch_scale_channel(int **ul_ch_estimates_ext,
+                             NR_gNB_ULSCH_t **ulsch_gNB,
+                             uint8_t symbol,
+                             uint8_t is_dmrs_symbol,
+-                            unsigned short nb_rb,
+-                            pusch_dmrs_type_t pusch_dmrs_type)
++                            uint32_t len,
++                            uint8_t nrOfLayers,
++                            unsigned short nb_rb)
+ {
+ 
+ #if defined(__x86_64__)||defined(__i386__)
+ 
+   short rb, ch_amp;
+-  unsigned char aarx;
++  unsigned char aarx,aatx;
+   __m128i *ul_ch128, ch_amp128;
+ 
++  uint32_t nb_rb_0 = len/12 + ((len%12)?1:0);
++
+   // Determine scaling amplitude based the symbol
+ 
+   ch_amp = 1024*8; //((pilots) ? (ulsch_gNB[0]->sqrt_rho_b) : (ulsch_gNB[0]->sqrt_rho_a));
+@@ -431,35 +466,26 @@ void nr_ulsch_scale_channel(int **ul_ch_estimates_ext,
+   int off = 0;
+ #endif
+ 
+-  for (aarx=0; aarx < frame_parms->nb_antennas_rx; aarx++) {
+-
+-      ul_ch128 = (__m128i *)&ul_ch_estimates_ext[aarx][symbol*(off+(nb_rb*NR_NB_SC_PER_RB))];
+-
+-      if (is_dmrs_symbol==1){
+-        if (pusch_dmrs_type == pusch_dmrs_type1)
+-          nb_rb = nb_rb>>1;
+-        else
+-          nb_rb = (2*nb_rb)/3;
+-      }
+-
+-      for (rb=0;rb<nb_rb;rb++) {
++for (aatx = 0; aatx < nrOfLayers; aatx++)
++{
++  for (aarx=0; aarx < frame_parms->nb_antennas_rx; aarx++) 
++  {
++      ul_ch128 = (__m128i *)&ul_ch_estimates_ext[aatx*frame_parms->nb_antennas_rx+aarx][symbol*(off+(nb_rb*NR_NB_SC_PER_RB))];
+ 
++      for (rb=0;rb < nb_rb_0;rb++) 
++      {
+         ul_ch128[0] = _mm_mulhi_epi16(ul_ch128[0], ch_amp128);
+         ul_ch128[0] = _mm_slli_epi16(ul_ch128[0], 3);
+ 
+         ul_ch128[1] = _mm_mulhi_epi16(ul_ch128[1], ch_amp128);
+         ul_ch128[1] = _mm_slli_epi16(ul_ch128[1], 3);
+ 
+-        if (is_dmrs_symbol) {
+-          ul_ch128+=2;
+-        } else {
+-          ul_ch128[2] = _mm_mulhi_epi16(ul_ch128[2], ch_amp128);
+-          ul_ch128[2] = _mm_slli_epi16(ul_ch128[2], 3);
+-          ul_ch128+=3;
+-
+-        }
++        ul_ch128[2] = _mm_mulhi_epi16(ul_ch128[2], ch_amp128);
++        ul_ch128[2] = _mm_slli_epi16(ul_ch128[2], 3);
++        ul_ch128+=3;
+       }
+-    }
++  }
++}
+ #endif
+ }
+ 
+@@ -481,6 +507,8 @@ void nr_ulsch_channel_level(int **ul_ch_estimates_ext,
+ 
+   int16_t x = factor2(len);
+   int16_t y = (len)>>x;
++  
++  uint32_t nb_rb_0 = len/12 + ((len%12)?1:0);
+ 
+ #ifdef __AVX2__
+   int off = ((nb_rb&1) == 1)? 4:0;
+@@ -489,13 +517,16 @@ void nr_ulsch_channel_level(int **ul_ch_estimates_ext,
+ #endif
+ 
+   for (aatx = 0; aatx < nrOfLayers; aatx++)
+-    for (aarx = 0; aarx < frame_parms->nb_antennas_rx; aarx++) {
++  {
++    for (aarx = 0; aarx < frame_parms->nb_antennas_rx; aarx++) 
++    {
+       //clear average level
+       avg128U = _mm_setzero_si128();
+ 
+       ul_ch128=(__m128i *)&ul_ch_estimates_ext[aatx*frame_parms->nb_antennas_rx+aarx][symbol*(off+(nb_rb*12))];
+ 
+-      for (rb = 0; rb < len/12; rb++) {
++      for (rb = 0; rb < nb_rb_0; rb++) 
++	  {
+         avg128U = _mm_add_epi32(avg128U, _mm_srai_epi32(_mm_madd_epi16(ul_ch128[0], ul_ch128[0]), x));
+         avg128U = _mm_add_epi32(avg128U, _mm_srai_epi32(_mm_madd_epi16(ul_ch128[1], ul_ch128[1]), x));
+         avg128U = _mm_add_epi32(avg128U, _mm_srai_epi32(_mm_madd_epi16(ul_ch128[2], ul_ch128[2]), x));
+@@ -508,7 +539,8 @@ void nr_ulsch_channel_level(int **ul_ch_estimates_ext,
+                                                     ((int32_t*)&avg128U)[3]) / y;
+ 
+     }
+-
++  }
++   
+   _mm_empty();
+   _m_empty();
+ 
+@@ -520,7 +552,7 @@ void nr_ulsch_channel_level(int **ul_ch_estimates_ext,
+   int16x4_t *ul_ch128;
+ 
+   symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
+-
++  uint32_t nb_rb_0 = len/12 + ((len%12)?1:0);
+   for (aatx=0; aatx<nrOfLayers; aatx++) {
+     for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
+       //clear average level
+@@ -529,7 +561,7 @@ void nr_ulsch_channel_level(int **ul_ch_estimates_ext,
+ 
+       ul_ch128 = (int16x4_t *)&ul_ch_estimates_ext[aatx*frame_parms->nb_antennas_rx+aarx][symbol*frame_parms->N_RB_UL*12];
+ 
+-      for (rb = 0; rb < nb_rb; rb++) {
++      for (rb = 0; rb < nb_rb_0; rb++) {
+         //  printf("rb %d : ",rb);
+         //  print_shorts("ch",&ul_ch128[0]);
+         avg128U = vqaddq_s32(avg128U, vmull_s16(ul_ch128[0], ul_ch128[0]));
+@@ -568,14 +600,20 @@ void nr_ulsch_channel_level(int **ul_ch_estimates_ext,
+ #endif
+ }
+ 
++
++
++//==============================================================================================
++// Pre-processing for LLR computation
++//==============================================================================================
+ void nr_ulsch_channel_compensation(int **rxdataF_ext,
+                                    int **ul_ch_estimates_ext,
+                                    int **ul_ch_mag,
+                                    int **ul_ch_magb,
+                                    int **rxdataF_comp,
+-                                   int **rho,
++                                   int ***rho,
+                                    NR_DL_FRAME_PARMS *frame_parms,
+                                    unsigned char symbol,
++                                   int length,
+                                    uint8_t is_dmrs_symbol,
+                                    unsigned char mod_order,
+                                    uint8_t  nrOfLayers,
+@@ -637,20 +675,24 @@ void nr_ulsch_channel_compensation(int **rxdataF_ext,
+   __m128i mmtmpD0,mmtmpD1,mmtmpD2,mmtmpD3,QAM_amp128={0},QAM_amp128b={0};
+   QAM_amp128b = _mm_setzero_si128();
+ 
+-  for (aatx=0; aatx<nrOfLayers; aatx++) {
+-
+-    if (mod_order == 4) {
++  uint32_t nb_rb_0 = length/12 + ((length%12)?1:0);
++  for (aatx=0; aatx<nrOfLayers; aatx++)
++  {
++    if (mod_order == 4) 
++    {
+       QAM_amp128 = _mm_set1_epi16(QAM16_n1);  // 2/sqrt(10)
+       QAM_amp128b = _mm_setzero_si128();
+-    } else if (mod_order == 6) {
++    } 
++    else if (mod_order == 6)
++    {
+       QAM_amp128  = _mm_set1_epi16(QAM64_n1); //
+       QAM_amp128b = _mm_set1_epi16(QAM64_n2);
+     }
+ 
+     //    printf("comp: rxdataF_comp %p, symbol %d\n",rxdataF_comp[0],symbol);
+ 
+-    for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
+-
++    for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) 
++    {
+       ul_ch128          = (__m128i *)&ul_ch_estimates_ext[aatx*frame_parms->nb_antennas_rx+aarx][symbol*(off+(nb_rb*12))];
+       ul_ch_mag128      = (__m128i *)&ul_ch_mag[aatx*frame_parms->nb_antennas_rx+aarx][symbol*(off+(nb_rb*12))];
+       ul_ch_mag128b     = (__m128i *)&ul_ch_magb[aatx*frame_parms->nb_antennas_rx+aarx][symbol*(off+(nb_rb*12))];
+@@ -658,8 +700,10 @@ void nr_ulsch_channel_compensation(int **rxdataF_ext,
+       rxdataF_comp128   = (__m128i *)&rxdataF_comp[aatx*frame_parms->nb_antennas_rx+aarx][symbol*(off+(nb_rb*12))];
+ 
+ 
+-      for (rb=0; rb<nb_rb; rb++) {
+-        if (mod_order>2) {
++      for (rb=0; rb<nb_rb_0; rb++) 
++      {
++        if (mod_order>2) 
++        {
+           // get channel amplitude if not QPSK
+ 
+           //print_shorts("ch:",(int16_t*)&ul_ch128[0]);
+@@ -679,6 +723,8 @@ void nr_ulsch_channel_compensation(int **rxdataF_ext,
+           ul_ch_mag128[0] = _mm_mulhi_epi16(ul_ch_mag128[0],QAM_amp128);
+           ul_ch_mag128[0] = _mm_slli_epi16(ul_ch_mag128[0],1);
+ 
++          ul_ch_mag128b[0] = _mm_mulhi_epi16(ul_ch_mag128b[0],QAM_amp128b);
++          ul_ch_mag128b[0] = _mm_slli_epi16(ul_ch_mag128b[0],1);
+           // print_ints("ch: = ",(int32_t*)&mmtmpD0);
+           // print_shorts("QAM_amp:",(int16_t*)&QAM_amp128);
+           // print_shorts("mag:",(int16_t*)&ul_ch_mag128[0]);
+@@ -687,30 +733,24 @@ void nr_ulsch_channel_compensation(int **rxdataF_ext,
+           ul_ch_mag128b[1] = ul_ch_mag128[1];
+           ul_ch_mag128[1] = _mm_mulhi_epi16(ul_ch_mag128[1],QAM_amp128);
+           ul_ch_mag128[1] = _mm_slli_epi16(ul_ch_mag128[1],1);
++          
++          ul_ch_mag128b[1] = _mm_mulhi_epi16(ul_ch_mag128b[1],QAM_amp128);
++          ul_ch_mag128b[1] = _mm_slli_epi16(ul_ch_mag128b[1],1);
+ 
+-          if (is_dmrs_symbol==0) {
+-            mmtmpD0 = _mm_madd_epi16(ul_ch128[2],ul_ch128[2]);
+-            mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+-            mmtmpD1 = _mm_packs_epi32(mmtmpD0,mmtmpD0);
+-
+-            ul_ch_mag128[2] = _mm_unpacklo_epi16(mmtmpD1,mmtmpD1);
+-            ul_ch_mag128b[2] = ul_ch_mag128[2];
++          mmtmpD0 = _mm_madd_epi16(ul_ch128[2],ul_ch128[2]);
++          mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
++          mmtmpD1 = _mm_packs_epi32(mmtmpD0,mmtmpD0);
+ 
+-            ul_ch_mag128[2] = _mm_mulhi_epi16(ul_ch_mag128[2],QAM_amp128);
+-            ul_ch_mag128[2] = _mm_slli_epi16(ul_ch_mag128[2],1);
+-          }
++          ul_ch_mag128[2] = _mm_unpacklo_epi16(mmtmpD1,mmtmpD1);
++          ul_ch_mag128b[2] = ul_ch_mag128[2];
+ 
+-          ul_ch_mag128b[0] = _mm_mulhi_epi16(ul_ch_mag128b[0],QAM_amp128b);
+-          ul_ch_mag128b[0] = _mm_slli_epi16(ul_ch_mag128b[0],1);
++          ul_ch_mag128[2] = _mm_mulhi_epi16(ul_ch_mag128[2],QAM_amp128);
++          ul_ch_mag128[2] = _mm_slli_epi16(ul_ch_mag128[2],1);
+ 
+ 
+-          ul_ch_mag128b[1] = _mm_mulhi_epi16(ul_ch_mag128b[1],QAM_amp128b);
+-          ul_ch_mag128b[1] = _mm_slli_epi16(ul_ch_mag128b[1],1);
++          ul_ch_mag128b[2] = _mm_mulhi_epi16(ul_ch_mag128b[2],QAM_amp128b);
++          ul_ch_mag128b[2] = _mm_slli_epi16(ul_ch_mag128b[2],1);
+ 
+-          if (is_dmrs_symbol==0) {
+-            ul_ch_mag128b[2] = _mm_mulhi_epi16(ul_ch_mag128b[2],QAM_amp128b);
+-            ul_ch_mag128b[2] = _mm_slli_epi16(ul_ch_mag128b[2],1);
+-          }
+         }
+ 
+         // multiply by conjugated channel
+@@ -755,117 +795,157 @@ void nr_ulsch_channel_compensation(int **rxdataF_ext,
+         //  print_shorts("ch:",ul_ch128+1);
+         //  print_shorts("pack:",rxdataF_comp128+1);
+ 
+-        if (is_dmrs_symbol==0) {
+-          // multiply by conjugated channel
+-          mmtmpD0 = _mm_madd_epi16(ul_ch128[2],rxdataF128[2]);
+-          // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+-          mmtmpD1 = _mm_shufflelo_epi16(ul_ch128[2],_MM_SHUFFLE(2,3,0,1));
+-          mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+-          mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
+-          mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[2]);
+-          // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+-          mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+-          mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
+-          mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+-          mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+-
+-          rxdataF_comp128[2] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+-          //  print_shorts("rx:",rxdataF128+2);
+-          //  print_shorts("ch:",ul_ch128+2);
+-          //        print_shorts("pack:",rxdataF_comp128+2);
+-
+-          ul_ch128+=3;
+-          ul_ch_mag128+=3;
+-          ul_ch_mag128b+=3;
+-          rxdataF128+=3;
+-          rxdataF_comp128+=3;
+-        } else { // we have a smaller PUSCH in symbols with pilots so skip last group of 4 REs and increment less
+-          ul_ch128+=2;
+-          ul_ch_mag128+=2;
+-          ul_ch_mag128b+=2;
+-          rxdataF128+=2;
+-          rxdataF_comp128+=2;
+-        }
+-
+-      }
+-    }
+-  }
+-
+-  if (rho) {
+-
+-    for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
+-      rho128        = (__m128i *)&rho[aarx][symbol*frame_parms->N_RB_UL*12];
+-      ul_ch128      = (__m128i *)&ul_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_UL*12];
+-      ul_ch128_2    = (__m128i *)&ul_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_UL*12];
+-
+-      for (rb=0; rb<nb_rb; rb++) {
+-        // multiply by conjugated channel
+-        mmtmpD0 = _mm_madd_epi16(ul_ch128[0],ul_ch128_2[0]);
+-        //  print_ints("re",&mmtmpD0);
+-
+-        // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+-        mmtmpD1 = _mm_shufflelo_epi16(ul_ch128[0],_MM_SHUFFLE(2,3,0,1));
+-        mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+-        mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)&conjugate[0]);
+-        //  print_ints("im",&mmtmpD1);
+-        mmtmpD1 = _mm_madd_epi16(mmtmpD1,ul_ch128_2[0]);
+-        // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+-        mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+-        //  print_ints("re(shift)",&mmtmpD0);
+-        mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
+-        //  print_ints("im(shift)",&mmtmpD1);
+-        mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+-        mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+-        //        print_ints("c0",&mmtmpD2);
+-        //  print_ints("c1",&mmtmpD3);
+-        rho128[0] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+-
+-        //print_shorts("rx:",ul_ch128_2);
+-        //print_shorts("ch:",ul_ch128);
+-        //print_shorts("pack:",rho128);
+-
+-        // multiply by conjugated channel
+-        mmtmpD0 = _mm_madd_epi16(ul_ch128[1],ul_ch128_2[1]);
+-        // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+-        mmtmpD1 = _mm_shufflelo_epi16(ul_ch128[1],_MM_SHUFFLE(2,3,0,1));
+-        mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+-        mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
+-        mmtmpD1 = _mm_madd_epi16(mmtmpD1,ul_ch128_2[1]);
+-        // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+-        mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+-        mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
+-        mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+-        mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+-
+-
+-        rho128[1] =_mm_packs_epi32(mmtmpD2,mmtmpD3);
+-        //print_shorts("rx:",ul_ch128_2+1);
+-        //print_shorts("ch:",ul_ch128+1);
+-        //print_shorts("pack:",rho128+1);
+         // multiply by conjugated channel
+-        mmtmpD0 = _mm_madd_epi16(ul_ch128[2],ul_ch128_2[2]);
++        mmtmpD0 = _mm_madd_epi16(ul_ch128[2],rxdataF128[2]);
+         // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+         mmtmpD1 = _mm_shufflelo_epi16(ul_ch128[2],_MM_SHUFFLE(2,3,0,1));
+         mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+         mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
+-        mmtmpD1 = _mm_madd_epi16(mmtmpD1,ul_ch128_2[2]);
++        mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[2]);
+         // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+         mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+         mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
+         mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+         mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+ 
+-        rho128[2] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+-        //print_shorts("rx:",ul_ch128_2+2);
++        rxdataF_comp128[2] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
++        //print_shorts("rx:",rxdataF128+2);
+         //print_shorts("ch:",ul_ch128+2);
+-        //print_shorts("pack:",rho128+2);
++        //print_shorts("pack:",rxdataF_comp128+2);
+ 
+         ul_ch128+=3;
+-        ul_ch128_2+=3;
+-        rho128+=3;
+-
++        ul_ch_mag128+=3;
++        ul_ch_mag128b+=3;
++        rxdataF128+=3;
++        rxdataF_comp128+=3;
+       }
++    }
++  }
+ 
++  if (rho) {
++    //we compute the Tx correlation matrix for each Rx antenna
++    //As an example the 2x2 MIMO case requires
++    //rho[aarx][nb_aatx*nb_aatx] = [cov(H_aarx_0,H_aarx_0) cov(H_aarx_0,H_aarx_1)
++    //                              cov(H_aarx_1,H_aarx_0) cov(H_aarx_1,H_aarx_1)], aarx=0,...,nb_antennas_rx-1
++
++    int avg_rho_re[frame_parms->nb_antennas_rx][nrOfLayers*nrOfLayers];
++    int avg_rho_im[frame_parms->nb_antennas_rx][nrOfLayers*nrOfLayers];
++
++    for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) 
++    {
++      for (aatx=0; aatx < nrOfLayers; aatx++) 
++      {
++        ul_ch128      = (__m128i *)&ul_ch_estimates_ext[aatx*frame_parms->nb_antennas_rx+aarx][symbol*(off+(nb_rb*12))];
++      
++        for (int atx=0; atx< nrOfLayers; atx++) 
++        {
++          avg_rho_re[aarx][aatx*nrOfLayers+atx] = 0;
++          avg_rho_im[aarx][aatx*nrOfLayers+atx] = 0;
++          rho128        = (__m128i *)&rho[aarx][aatx*nrOfLayers+atx][symbol*(off+(nb_rb*12))];
++          ul_ch128_2    = (__m128i *)&ul_ch_estimates_ext[atx*frame_parms->nb_antennas_rx+aarx][symbol*(off+(nb_rb*12))];
++
++          for (rb=0; rb<nb_rb_0; rb++) 
++          {
++            // multiply by conjugated channel
++            mmtmpD0 = _mm_madd_epi16(ul_ch128[0],ul_ch128_2[0]);
++            //  print_ints("re",&mmtmpD0);
++
++            // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
++            mmtmpD1 = _mm_shufflelo_epi16(ul_ch128[0],_MM_SHUFFLE(2,3,0,1));
++            mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
++            mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)&conjugate[0]);
++            //  print_ints("im",&mmtmpD1);
++            mmtmpD1 = _mm_madd_epi16(mmtmpD1,ul_ch128_2[0]);
++            // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
++            mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
++            //  print_ints("re(shift)",&mmtmpD0);
++            mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
++            //  print_ints("im(shift)",&mmtmpD1);
++            mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
++            mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
++            //        print_ints("c0",&mmtmpD2);
++            //  print_ints("c1",&mmtmpD3);
++            rho128[0] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
++
++            //print_shorts("rx:",ul_ch128_2);
++            //print_shorts("ch:",ul_ch128);
++            //print_shorts("pack:",rho128);
++
++            avg_rho_re[aarx][aatx*nrOfLayers+atx] +=(((int16_t*)&rho128[0])[0]+
++              ((int16_t*)&rho128[0])[2] +
++              ((int16_t*)&rho128[0])[4] +
++              ((int16_t*)&rho128[0])[6])/16;//
++
++            avg_rho_im[aarx][aatx*nrOfLayers+atx] +=(((int16_t*)&rho128[0])[1]+
++              ((int16_t*)&rho128[0])[3] +
++              ((int16_t*)&rho128[0])[5] +
++              ((int16_t*)&rho128[0])[7])/16;//
++            // multiply by conjugated channel
++            mmtmpD0 = _mm_madd_epi16(ul_ch128[1],ul_ch128_2[1]);
++            // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
++            mmtmpD1 = _mm_shufflelo_epi16(ul_ch128[1],_MM_SHUFFLE(2,3,0,1));
++            mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
++            mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
++            mmtmpD1 = _mm_madd_epi16(mmtmpD1,ul_ch128_2[1]);
++            // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
++            mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
++            mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
++            mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
++            mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
++            rho128[1] =_mm_packs_epi32(mmtmpD2,mmtmpD3);
++            //print_shorts("rx:",ul_ch128_2+1);
++            //print_shorts("ch:",ul_ch128+1);
++            //print_shorts("pack:",rho128+1);
++
++            // multiply by conjugated channel
++            avg_rho_re[aarx][aatx*nrOfLayers+atx] +=(((int16_t*)&rho128[1])[0]+
++              ((int16_t*)&rho128[1])[2] +
++              ((int16_t*)&rho128[1])[4] +
++              ((int16_t*)&rho128[1])[6])/16;
++
++            avg_rho_im[aarx][aatx*nrOfLayers+atx] +=(((int16_t*)&rho128[1])[1]+
++              ((int16_t*)&rho128[1])[3] +
++              ((int16_t*)&rho128[1])[5] +
++              ((int16_t*)&rho128[1])[7])/16;
++
++            mmtmpD0 = _mm_madd_epi16(ul_ch128[2],ul_ch128_2[2]);
++            // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
++            mmtmpD1 = _mm_shufflelo_epi16(ul_ch128[2],_MM_SHUFFLE(2,3,0,1));
++            mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
++            mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
++            mmtmpD1 = _mm_madd_epi16(mmtmpD1,ul_ch128_2[2]);
++            // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
++            mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
++            mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
++            mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
++            mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
++
++            rho128[2] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
++            //print_shorts("rx:",ul_ch128_2+2);
++            //print_shorts("ch:",ul_ch128+2);
++            //print_shorts("pack:",rho128+2);
++            avg_rho_re[aarx][aatx*nrOfLayers+atx] +=(((int16_t*)&rho128[2])[0]+
++              ((int16_t*)&rho128[2])[2] +
++              ((int16_t*)&rho128[2])[4] +
++              ((int16_t*)&rho128[2])[6])/16;
++
++            avg_rho_im[aarx][aatx*nrOfLayers+atx] +=(((int16_t*)&rho128[2])[1]+
++              ((int16_t*)&rho128[2])[3] +
++              ((int16_t*)&rho128[2])[5] +
++              ((int16_t*)&rho128[2])[7])/16;
++
++            ul_ch128+=3;
++            ul_ch128_2+=3;
++            rho128+=3;
++          }
++          if (is_dmrs_symbol==1) {
++            //measurements->rx_correlation[0][0][aarx] = signal_energy(&rho[aarx][aatx*nb_aatx+atx][symbol*nb_rb*12],rb*12);
++            avg_rho_re[aarx][aatx*nrOfLayers+atx] = 16*avg_rho_re[aarx][aatx*nrOfLayers+atx]/(nb_rb*12);
++            avg_rho_im[aarx][aatx*nrOfLayers+atx] = 16*avg_rho_im[aarx][aatx*nrOfLayers+atx]/(nb_rb*12);
++            //printf("rho[rx]%d tx%d tx%d = Re: %d Im: %d\n",aarx, aatx,atx, avg_rho_re[aarx][aatx*nb_aatx+atx], avg_rho_im[aarx][aatx*nb_aatx+atx]);
++          }
++        }
++      }
+     }
+   }
+ 
+@@ -1105,19 +1185,23 @@ void nr_ulsch_channel_compensation(int **rxdataF_ext,
+ }
+ 
+ void nr_ulsch_detection_mrc(NR_DL_FRAME_PARMS *frame_parms,
+-			    int32_t **rxdataF_comp,
+-			    int32_t **ul_ch_mag,
+-			    int32_t **ul_ch_magb,
+-			    uint8_t symbol,
+-			    uint16_t nb_rb) {
++                int32_t **rxdataF_comp,
++                int32_t **ul_ch_mag,
++                int32_t **ul_ch_magb,
++                int32_t ***rho,                
++                uint8_t  nrOfLayers,
++                uint8_t symbol,
++                uint16_t nb_rb,
++                int length) {
+   int n_rx = frame_parms->nb_antennas_rx;
+ #if defined(__x86_64__) || defined(__i386__)
+-  __m128i *rxdataF_comp128[1+n_rx],*ul_ch_mag128[1+n_rx],*ul_ch_mag128b[1+n_rx];
++  __m128i *rxdataF_comp128[2],*ul_ch_mag128[2],*ul_ch_mag128b[2];
+ #elif defined(__arm__)
+   int16x8_t *rxdataF_comp128_0,*ul_ch_mag128_0,*ul_ch_mag128_0b;
+   int16x8_t *rxdataF_comp128_1,*ul_ch_mag128_1,*ul_ch_mag128_1b;
+ #endif
+   int32_t i;
++  uint32_t nb_rb_0 = length/12 + ((length%12)?1:0);
+ 
+ #ifdef __AVX2__
+   int off = ((nb_rb&1) == 1)? 4:0;
+@@ -1125,23 +1209,34 @@ void nr_ulsch_detection_mrc(NR_DL_FRAME_PARMS *frame_parms,
+   int off = 0;
+ #endif
+ 
+-  if (frame_parms->nb_antennas_rx>1) {
+-#if defined(__x86_64__) || defined(__i386__)
+-    int nb_re = nb_rb*12;
+-    for (int aa=0;aa<frame_parms->nb_antennas_rx;aa++) {
+-      rxdataF_comp128[aa]   = (__m128i *)&rxdataF_comp[aa][(symbol*(nb_re + off))];
+-      ul_ch_mag128[aa]      = (__m128i *)&ul_ch_mag[aa][(symbol*(nb_re + off))];
+-      ul_ch_mag128b[aa]     = (__m128i *)&ul_ch_magb[aa][(symbol*(nb_re + off))];
+-    }
+-    for (int aa=1;aa<frame_parms->nb_antennas_rx;aa++) {      
+-      // MRC on each re of rb, both on MF output and magnitude (for 16QAM/64QAM llr computation)
+-      for (i=0; i<nb_rb*3; i++) {
+-	rxdataF_comp128[0][i] = _mm_adds_epi16(rxdataF_comp128[0][i],rxdataF_comp128[aa][i]);
+-	ul_ch_mag128[0][i]    = _mm_adds_epi16(ul_ch_mag128[0][i], ul_ch_mag128[aa][i]);
+-	ul_ch_mag128b[0][i]   = _mm_adds_epi16(ul_ch_mag128b[0][i],ul_ch_mag128b[aa][i]);
++  if (n_rx > 1) 
++  {
++    #if defined(__x86_64__) || defined(__i386__)
++    for (int aatx=0; aatx<nrOfLayers; aatx++)
++    {      
++      int nb_re = nb_rb*12;
++
++      rxdataF_comp128[0]   = (__m128i *)&rxdataF_comp[aatx*frame_parms->nb_antennas_rx][(symbol*(nb_re + off))];
++      ul_ch_mag128[0]      = (__m128i *)&ul_ch_mag[aatx*frame_parms->nb_antennas_rx][(symbol*(nb_re + off))];
++      ul_ch_mag128b[0]     = (__m128i *)&ul_ch_magb[aatx*frame_parms->nb_antennas_rx][(symbol*(nb_re + off))];
++
++      for (int aa=1;aa < n_rx;aa++) 
++      {
++        rxdataF_comp128[1]   = (__m128i *)&rxdataF_comp[aatx*frame_parms->nb_antennas_rx+aa][(symbol*(nb_re + off))];
++        ul_ch_mag128[1]      = (__m128i *)&ul_ch_mag[aatx*frame_parms->nb_antennas_rx+aa][(symbol*(nb_re + off))];
++        ul_ch_mag128b[1]     = (__m128i *)&ul_ch_magb[aatx*frame_parms->nb_antennas_rx+aa][(symbol*(nb_re + off))];
++      
++        // MRC on each re of rb, both on MF output and magnitude (for 16QAM/64QAM llr computation)
++        for (i=0; i<nb_rb_0*3; i++) 
++        {
++            rxdataF_comp128[0][i] = _mm_adds_epi16(rxdataF_comp128[0][i],rxdataF_comp128[1][i]);
++            ul_ch_mag128[0][i]    = _mm_adds_epi16(ul_ch_mag128[0][i],ul_ch_mag128[1][i]);
++            ul_ch_mag128b[0][i]   = _mm_adds_epi16(ul_ch_mag128b[0][i],ul_ch_mag128b[1][i]);
++            //rxdataF_comp128[0][i] = _mm_add_epi16(rxdataF_comp128_0[i],(*(__m128i *)&jitterc[0]));
++        }
+       }
+     }
+-#elif defined(__arm__)
++    #elif defined(__arm__)
+     rxdataF_comp128_0   = (int16x8_t *)&rxdataF_comp[0][symbol*frame_parms->N_RB_DL*12];
+     rxdataF_comp128_1   = (int16x8_t *)&rxdataF_comp[1][symbol*frame_parms->N_RB_DL*12];
+     ul_ch_mag128_0      = (int16x8_t *)&ul_ch_mag[0][symbol*frame_parms->N_RB_DL*12];
+@@ -1156,7 +1251,7 @@ void nr_ulsch_detection_mrc(NR_DL_FRAME_PARMS *frame_parms,
+       ul_ch_mag128_0b[i]   = vhaddq_s16(ul_ch_mag128_0b[i],ul_ch_mag128_1b[i]);
+       rxdataF_comp128_0[i] = vqaddq_s16(rxdataF_comp128_0[i],(*(int16x8_t *)&jitterc[0]));
+     }
+-#endif
++    #endif
+   }
+ 
+ #if defined(__x86_64__) || defined(__i386__)
+@@ -1165,6 +1260,696 @@ void nr_ulsch_detection_mrc(NR_DL_FRAME_PARMS *frame_parms,
+ #endif
+ }
+ 
++/* Zero Forcing Rx function: nr_det_HhH()
++ *
++ *
++ * */
++void nr_ulsch_det_HhH(int32_t *after_mf_00,//a
++                int32_t *after_mf_01,//b
++                int32_t *after_mf_10,//c
++                int32_t *after_mf_11,//d
++                int32_t *det_fin,//1/ad-bc
++                unsigned short nb_rb,
++                unsigned char symbol,
++                int32_t shift)
++{
++  int16_t nr_conjug2[8]__attribute__((aligned(16))) = {1,-1,1,-1,1,-1,1,-1} ;
++  unsigned short rb;
++  __m128i *after_mf_00_128,*after_mf_01_128, *after_mf_10_128, *after_mf_11_128, ad_re_128, bc_re_128; //ad_im_128, bc_im_128;
++  __m128i *det_fin_128, det_re_128; //det_im_128, tmp_det0, tmp_det1;
++
++  after_mf_00_128 = (__m128i *)after_mf_00;
++  after_mf_01_128 = (__m128i *)after_mf_01;
++  after_mf_10_128 = (__m128i *)after_mf_10;
++  after_mf_11_128 = (__m128i *)after_mf_11;
++
++  det_fin_128 = (__m128i *)det_fin;
++
++  for (rb=0; rb<3*nb_rb; rb++) {
++
++    //complex multiplication (I_a+jQ_a)(I_d+jQ_d) = (I_aI_d - Q_aQ_d) + j(Q_aI_d + I_aQ_d)
++    //The imag part is often zero, we compute only the real part
++    ad_re_128 = _mm_sign_epi16(after_mf_00_128[0],*(__m128i*)&nr_conjug2[0]);
++    ad_re_128 = _mm_madd_epi16(ad_re_128,after_mf_11_128[0]); //Re: I_a0*I_d0 - Q_a1*Q_d1
++    //ad_im_128 = _mm_shufflelo_epi16(after_mf_00_128[0],_MM_SHUFFLE(2,3,0,1));//permutes IQs for the low 64 bits as [I_a0 Q_a1 I_a2 Q_a3]_64bits to [Q_a1 I_a0 Q_a3 I_a2]_64bits
++    //ad_im_128 = _mm_shufflehi_epi16(ad_im_128,_MM_SHUFFLE(2,3,0,1));//permutes IQs for the high 64 bits as [I_a0 Q_a1 I_a2 Q_a3]_64bits to [Q_a1 I_a0 Q_a3 I_a2]_64bits
++    //ad_im_128 = _mm_madd_epi16(ad_im_128,after_mf_11_128[0]);//Im: (Q_aI_d + I_aQ_d)
++
++    //complex multiplication (I_b+jQ_b)(I_c+jQ_c) = (I_bI_c - Q_bQ_c) + j(Q_bI_c + I_bQ_c)
++    //The imag part is often zero, we compute only the real part
++    bc_re_128 = _mm_sign_epi16(after_mf_01_128[0],*(__m128i*)&nr_conjug2[0]);
++    bc_re_128 = _mm_madd_epi16(bc_re_128,after_mf_10_128[0]); //Re: I_b0*I_c0 - Q_b1*Q_c1
++    //bc_im_128 = _mm_shufflelo_epi16(after_mf_01_128[0],_MM_SHUFFLE(2,3,0,1));//permutes IQs for the low 64 bits as [I_b0 Q_b1 I_b2 Q_b3]_64bits to [Q_b1 I_b0 Q_b3 I_b2]_64bits
++    //bc_im_128 = _mm_shufflehi_epi16(bc_im_128,_MM_SHUFFLE(2,3,0,1));//permutes IQs for the high 64 bits as [I_b0 Q_b1 I_b2 Q_b3]_64bits to [Q_b1 I_b0 Q_b3 I_b2]_64bits
++    //bc_im_128 = _mm_madd_epi16(bc_im_128,after_mf_10_128[0]);//Im: (Q_bI_c + I_bQ_c)
++
++    det_re_128 = _mm_sub_epi32(ad_re_128, bc_re_128);
++    //det_im_128 = _mm_sub_epi32(ad_im_128, bc_im_128);
++
++    //det in Q30 format
++    det_fin_128[0] = _mm_abs_epi32(det_re_128);
++
++
++#ifdef DEBUG_DLSCH_DEMOD
++     printf("\n Computing det_HhH_inv \n");
++     //print_ints("det_re_128:",(int32_t*)&det_re_128);
++     //print_ints("det_im_128:",(int32_t*)&det_im_128);
++     print_ints("det_fin_128:",(int32_t*)&det_fin_128[0]);
++#endif
++    det_fin_128+=1;
++    after_mf_00_128+=1;
++    after_mf_01_128+=1;
++    after_mf_10_128+=1;
++    after_mf_11_128+=1;
++  }
++  _mm_empty();
++  _m_empty();
++}
++
++/* Zero Forcing Rx function: nr_inv_comp_muli
++ * Complex number multi: z = x*y
++ *                         = (x_re*y_re - x_im*y_im) + j(x_im*y_re + x_re*y_im)
++ * */
++__m128i nr_ulsch_inv_comp_muli(__m128i input_x,
++                         __m128i input_y)
++{
++  int16_t nr_conjug2[8]__attribute__((aligned(16))) = {1,-1,1,-1,1,-1,1,-1} ;
++
++  __m128i xy_re_128, xy_im_128;
++  __m128i output_z, tmp_z0, tmp_z1;
++
++  // complex multiplication (x_re + jx_im)*(y_re + jy_im) = (x_re*y_re - x_im*y_im) + j(x_im*y_re + x_re*y_im)
++
++  // the real part
++  xy_re_128 = _mm_sign_epi16(input_x,*(__m128i*)&nr_conjug2[0]);
++  xy_re_128 = _mm_madd_epi16(xy_re_128,input_y); //Re: (x_re*y_re - x_im*y_im)
++
++  // the imag part
++  xy_im_128 = _mm_shufflelo_epi16(input_x,_MM_SHUFFLE(2,3,0,1));//permutes IQs for the low 64 bits as [I_a0 Q_a1 I_a2 Q_a3]_64bits to [Q_a1 I_a0 Q_a3 I_a2]_64bits
++  xy_im_128 = _mm_shufflehi_epi16(xy_im_128,_MM_SHUFFLE(2,3,0,1));//permutes IQs for the high 64 bits as [I_a0 Q_a1 I_a2 Q_a3]_64bits to [Q_a1 I_a0 Q_a3 I_a2]_64bits
++  xy_im_128 = _mm_madd_epi16(xy_im_128,input_y);//Im: (x_im*y_re + x_re*y_im)
++
++  //convert back to Q15 before packing
++  xy_re_128 = _mm_srai_epi32(xy_re_128,4);//(2^15/64*2*16)
++  xy_im_128 = _mm_srai_epi32(xy_im_128,4);
++
++  tmp_z0  = _mm_unpacklo_epi32(xy_re_128,xy_im_128);
++  //print_ints("unpack lo:",&tmp_z0[0]);
++  tmp_z1  = _mm_unpackhi_epi32(xy_re_128,xy_im_128);
++  //print_ints("unpack hi:",&tmp_z1[0]);
++  output_z = _mm_packs_epi32(tmp_z0,tmp_z1);
++
++  _mm_empty();
++  _m_empty();
++  return(output_z);
++}
++
++/* Zero Forcing Rx function: nr_conjch0_mult_ch1()
++ *
++ *
++ * */
++void nr_ulsch_conjch0_mult_ch1(int *ch0,
++                         int *ch1,
++                         int32_t *ch0conj_ch1,
++                         unsigned short nb_rb,
++                         unsigned char output_shift0)
++{
++  //This function is used to compute multiplications in H_hermitian * H matrix
++  short nr_conjugate[8]__attribute__((aligned(16))) = {-1,1,-1,1,-1,1,-1,1};
++  unsigned short rb;
++  __m128i *dl_ch0_128,*dl_ch1_128, *ch0conj_ch1_128, mmtmpD0,mmtmpD1,mmtmpD2,mmtmpD3;
++
++  dl_ch0_128 = (__m128i *)ch0;
++  dl_ch1_128 = (__m128i *)ch1;
++
++  ch0conj_ch1_128 = (__m128i *)ch0conj_ch1;
++
++  for (rb=0; rb<3*nb_rb; rb++) {
++
++    mmtmpD0 = _mm_madd_epi16(dl_ch0_128[0],dl_ch1_128[0]);
++    mmtmpD1 = _mm_shufflelo_epi16(dl_ch0_128[0],_MM_SHUFFLE(2,3,0,1));
++    mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
++    mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)&nr_conjugate[0]);
++    mmtmpD1 = _mm_madd_epi16(mmtmpD1,dl_ch1_128[0]);
++    mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift0);
++    mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift0);
++    mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
++    mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
++
++    ch0conj_ch1_128[0] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
++
++    /*printf("\n Computing conjugates \n");
++    print_shorts("ch0:",(int16_t*)&dl_ch0_128[0]);
++    print_shorts("ch1:",(int16_t*)&dl_ch1_128[0]);
++    print_shorts("pack:",(int16_t*)&ch0conj_ch1_128[0]);*/
++
++    dl_ch0_128+=1;
++    dl_ch1_128+=1;
++    ch0conj_ch1_128+=1;
++  }
++  _mm_empty();
++  _m_empty();
++}
++__m128i nr_ulsch_comp_muli_sum(__m128i input_x,
++                         __m128i input_y,
++                         __m128i input_w,
++                         __m128i input_z,
++                         __m128i det)
++{
++  int16_t nr_conjug2[8]__attribute__((aligned(16))) = {1,-1,1,-1,1,-1,1,-1} ;
++
++  __m128i xy_re_128, xy_im_128, wz_re_128, wz_im_128;
++  __m128i output, tmp_z0, tmp_z1;
++
++  // complex multiplication (x_re + jx_im)*(y_re + jy_im) = (x_re*y_re - x_im*y_im) + j(x_im*y_re + x_re*y_im)
++  // the real part
++  xy_re_128 = _mm_sign_epi16(input_x,*(__m128i*)&nr_conjug2[0]);
++  xy_re_128 = _mm_madd_epi16(xy_re_128,input_y); //Re: (x_re*y_re - x_im*y_im)
++
++  // the imag part
++  xy_im_128 = _mm_shufflelo_epi16(input_x,_MM_SHUFFLE(2,3,0,1));//permutes IQs for the low 64 bits as [I_a0 Q_a1 I_a2 Q_a3]_64bits to [Q_a1 I_a0 Q_a3 I_a2]_64bits
++  xy_im_128 = _mm_shufflehi_epi16(xy_im_128,_MM_SHUFFLE(2,3,0,1));//permutes IQs for the high 64 bits as [I_a0 Q_a1 I_a2 Q_a3]_64bits to [Q_a1 I_a0 Q_a3 I_a2]_64bits
++  xy_im_128 = _mm_madd_epi16(xy_im_128,input_y);//Im: (x_im*y_re + x_re*y_im)
++
++  // complex multiplication (w_re + jw_im)*(z_re + jz_im) = (w_re*z_re - w_im*z_im) + j(w_im*z_re + w_re*z_im)
++  // the real part
++  wz_re_128 = _mm_sign_epi16(input_w,*(__m128i*)&nr_conjug2[0]);
++  wz_re_128 = _mm_madd_epi16(wz_re_128,input_z); //Re: (w_re*z_re - w_im*z_im)
++
++  // the imag part
++  wz_im_128 = _mm_shufflelo_epi16(input_w,_MM_SHUFFLE(2,3,0,1));//permutes IQs for the low 64 bits as [I_a0 Q_a1 I_a2 Q_a3]_64bits to [Q_a1 I_a0 Q_a3 I_a2]_64bits
++  wz_im_128 = _mm_shufflehi_epi16(wz_im_128,_MM_SHUFFLE(2,3,0,1));//permutes IQs for the high 64 bits as [I_a0 Q_a1 I_a2 Q_a3]_64bits to [Q_a1 I_a0 Q_a3 I_a2]_64bits
++  wz_im_128 = _mm_madd_epi16(wz_im_128,input_z);//Im: (w_im*z_re + w_re*z_im)
++
++
++  xy_re_128 = _mm_sub_epi32(xy_re_128, wz_re_128);
++  xy_im_128 = _mm_sub_epi32(xy_im_128, wz_im_128);
++  //print_ints("rx_re:",(int32_t*)&xy_re_128[0]);
++  //print_ints("rx_Img:",(int32_t*)&xy_im_128[0]);
++  //divide by matrix det and convert back to Q15 before packing
++  int sum_det =0;
++  for (int k=0; k<4;k++) {
++    sum_det += ((((int *)&det[0])[k])>>2);
++    //printf("det_%d = %d log2 =%d \n",k,(((int *)&det[0])[k]),log2_approx(((int *)&det[0])[k]));
++    }
++
++  xy_re_128 = _mm_slli_epi32(xy_re_128,5);
++  xy_re_128 = _mm_srai_epi32(xy_re_128,log2_approx(sum_det));
++  xy_re_128 = _mm_slli_epi32(xy_re_128,5);
++
++  xy_im_128 = _mm_slli_epi32(xy_im_128,5);
++  xy_im_128 = _mm_srai_epi32(xy_im_128,log2_approx(sum_det));
++  xy_im_128 = _mm_slli_epi32(xy_im_128,5);
++
++  tmp_z0  = _mm_unpacklo_epi32(xy_re_128,xy_im_128);
++  //print_ints("unpack lo:",&tmp_z0[0]);
++  tmp_z1  = _mm_unpackhi_epi32(xy_re_128,xy_im_128);
++  //print_ints("unpack hi:",&tmp_z1[0]);
++  output = _mm_packs_epi32(tmp_z0,tmp_z1);
++
++  _mm_empty();
++  _m_empty();
++  return(output);
++}
++/* Zero Forcing Rx function: nr_construct_HhH_elements()
++ *
++ *
++ * */
++void nr_ulsch_construct_HhH_elements(int *conjch00_ch00,
++                               int *conjch01_ch01,
++                               int *conjch11_ch11,
++                               int *conjch10_ch10,//
++                               int *conjch20_ch20,
++                               int *conjch21_ch21,
++                               int *conjch30_ch30,
++                               int *conjch31_ch31,
++                               int *conjch00_ch01,//00_01
++                               int *conjch01_ch00,//01_00
++                               int *conjch10_ch11,//10_11
++                               int *conjch11_ch10,//11_10
++                               int *conjch20_ch21,
++                               int *conjch21_ch20,
++                               int *conjch30_ch31,
++                               int *conjch31_ch30,
++                               int32_t *after_mf_00,
++                               int32_t *after_mf_01,
++                               int32_t *after_mf_10,
++                               int32_t *after_mf_11,
++                               unsigned short nb_rb,
++                               unsigned char symbol)
++{
++  //This function is used to construct the (H_hermitian * H matrix) matrix elements
++  unsigned short rb;
++  __m128i *conjch00_ch00_128, *conjch01_ch01_128, *conjch11_ch11_128, *conjch10_ch10_128;
++  __m128i *conjch20_ch20_128, *conjch21_ch21_128, *conjch30_ch30_128, *conjch31_ch31_128;
++  __m128i *conjch00_ch01_128, *conjch01_ch00_128, *conjch10_ch11_128, *conjch11_ch10_128;
++  __m128i *conjch20_ch21_128, *conjch21_ch20_128, *conjch30_ch31_128, *conjch31_ch30_128;
++  __m128i *after_mf_00_128, *after_mf_01_128, *after_mf_10_128, *after_mf_11_128;
++
++  conjch00_ch00_128 = (__m128i *)conjch00_ch00;
++  conjch01_ch01_128 = (__m128i *)conjch01_ch01;
++  conjch11_ch11_128 = (__m128i *)conjch11_ch11;
++  conjch10_ch10_128 = (__m128i *)conjch10_ch10;
++
++  conjch20_ch20_128 = (__m128i *)conjch20_ch20;
++  conjch21_ch21_128 = (__m128i *)conjch21_ch21;
++  conjch30_ch30_128 = (__m128i *)conjch30_ch30;
++  conjch31_ch31_128 = (__m128i *)conjch31_ch31;
++
++  conjch00_ch01_128 = (__m128i *)conjch00_ch01;
++  conjch01_ch00_128 = (__m128i *)conjch01_ch00;
++  conjch10_ch11_128 = (__m128i *)conjch10_ch11;
++  conjch11_ch10_128 = (__m128i *)conjch11_ch10;
++
++  conjch20_ch21_128 = (__m128i *)conjch20_ch21;
++  conjch21_ch20_128 = (__m128i *)conjch21_ch20;
++  conjch30_ch31_128 = (__m128i *)conjch30_ch31;
++  conjch31_ch30_128 = (__m128i *)conjch31_ch30;
++
++  after_mf_00_128 = (__m128i *)after_mf_00;
++  after_mf_01_128 = (__m128i *)after_mf_01;
++  after_mf_10_128 = (__m128i *)after_mf_10;
++  after_mf_11_128 = (__m128i *)after_mf_11;
++
++  for (rb=0; rb<3*nb_rb; rb++) {
++
++    after_mf_00_128[0] =_mm_adds_epi16(conjch00_ch00_128[0],conjch10_ch10_128[0]);//00_00 + 10_10
++    if (conjch20_ch20 != NULL) after_mf_00_128[0] =_mm_adds_epi16(after_mf_00_128[0],conjch20_ch20_128[0]);
++    if (conjch30_ch30 != NULL) after_mf_00_128[0] =_mm_adds_epi16(after_mf_00_128[0],conjch30_ch30_128[0]);
++
++    after_mf_11_128[0] =_mm_adds_epi16(conjch01_ch01_128[0], conjch11_ch11_128[0]); //01_01 + 11_11
++    if (conjch21_ch21 != NULL) after_mf_11_128[0] =_mm_adds_epi16(after_mf_11_128[0],conjch21_ch21_128[0]);
++    if (conjch31_ch31 != NULL) after_mf_11_128[0] =_mm_adds_epi16(after_mf_11_128[0],conjch31_ch31_128[0]);
++
++    after_mf_01_128[0] =_mm_adds_epi16(conjch00_ch01_128[0], conjch10_ch11_128[0]);//00_01 + 10_11
++    if (conjch20_ch21 != NULL) after_mf_01_128[0] =_mm_adds_epi16(after_mf_01_128[0],conjch20_ch21_128[0]);
++    if (conjch30_ch31 != NULL) after_mf_01_128[0] =_mm_adds_epi16(after_mf_01_128[0],conjch30_ch31_128[0]);
++
++    after_mf_10_128[0] =_mm_adds_epi16(conjch01_ch00_128[0], conjch11_ch10_128[0]);//01_00 + 11_10
++    if (conjch21_ch20 != NULL) after_mf_10_128[0] =_mm_adds_epi16(after_mf_10_128[0],conjch21_ch20_128[0]);
++    if (conjch31_ch30 != NULL) after_mf_10_128[0] =_mm_adds_epi16(after_mf_10_128[0],conjch31_ch30_128[0]);
++
++#ifdef DEBUG_DLSCH_DEMOD
++    if ((rb<=30))
++    {
++      printf(" \n construct_HhH_elements \n");
++      print_shorts("after_mf_00_128:",(int16_t*)&after_mf_00_128[0]);
++      print_shorts("after_mf_01_128:",(int16_t*)&after_mf_01_128[0]);
++      print_shorts("after_mf_10_128:",(int16_t*)&after_mf_10_128[0]);
++      print_shorts("after_mf_11_128:",(int16_t*)&after_mf_11_128[0]);
++    }
++#endif
++    conjch00_ch00_128+=1;
++    conjch10_ch10_128+=1;
++    conjch01_ch01_128+=1;
++    conjch11_ch11_128+=1;
++
++    if (conjch20_ch20 != NULL) conjch20_ch20_128+=1;
++    if (conjch21_ch21 != NULL) conjch21_ch21_128+=1;
++    if (conjch30_ch30 != NULL) conjch30_ch30_128+=1;
++    if (conjch31_ch31 != NULL) conjch31_ch31_128+=1;
++
++    conjch00_ch01_128+=1;
++    conjch01_ch00_128+=1;
++    conjch10_ch11_128+=1;
++    conjch11_ch10_128+=1;
++
++    if (conjch20_ch21 != NULL) conjch20_ch21_128+=1;
++    if (conjch21_ch20 != NULL) conjch21_ch20_128+=1;
++    if (conjch30_ch31 != NULL) conjch30_ch31_128+=1;
++    if (conjch31_ch30 != NULL) conjch31_ch30_128+=1;
++
++    after_mf_00_128 += 1;
++    after_mf_01_128 += 1;
++    after_mf_10_128 += 1;
++    after_mf_11_128 += 1;
++  }
++  _mm_empty();
++  _m_empty();
++}
++
++/* Zero Forcing Rx function: nr_ulsch_zero_forcing_rx_2layers()
++ *
++ *
++ * */
++uint8_t nr_ulsch_zero_forcing_rx_2layers(int **rxdataF_comp,
++                                   int **ul_ch_mag,
++                                   int **ul_ch_magb,                                   
++                                   int **ul_ch_estimates_ext,
++                                   unsigned short nb_rb,
++                                   unsigned char n_rx,
++                                   unsigned char mod_order,
++                                   int shift,
++                                   unsigned char symbol,
++                                   int length)
++{
++  int *ch00, *ch01, *ch10, *ch11;
++  int *ch20, *ch30, *ch21, *ch31;
++  uint32_t nb_rb_0 = length/12 + ((length%12)?1:0);
++
++  #ifdef __AVX2__
++  int off = ((nb_rb&1) == 1)? 4:0;
++  #else
++  int off = 0;
++  #endif
++
++  /* we need at least alignment to 16 bytes, let's put 32 to be sure
++   * (maybe not necessary but doesn't hurt)
++   */
++  int32_t conjch00_ch01[12*nb_rb] __attribute__((aligned(32)));
++  int32_t conjch01_ch00[12*nb_rb] __attribute__((aligned(32)));
++  int32_t conjch10_ch11[12*nb_rb] __attribute__((aligned(32)));
++  int32_t conjch11_ch10[12*nb_rb] __attribute__((aligned(32)));
++  int32_t conjch00_ch00[12*nb_rb] __attribute__((aligned(32)));
++  int32_t conjch01_ch01[12*nb_rb] __attribute__((aligned(32)));
++  int32_t conjch10_ch10[12*nb_rb] __attribute__((aligned(32)));
++  int32_t conjch11_ch11[12*nb_rb] __attribute__((aligned(32)));
++  int32_t conjch20_ch20[12*nb_rb] __attribute__((aligned(32)));
++  int32_t conjch21_ch21[12*nb_rb] __attribute__((aligned(32)));
++  int32_t conjch30_ch30[12*nb_rb] __attribute__((aligned(32)));
++  int32_t conjch31_ch31[12*nb_rb] __attribute__((aligned(32)));
++  int32_t conjch20_ch21[12*nb_rb] __attribute__((aligned(32)));
++  int32_t conjch30_ch31[12*nb_rb] __attribute__((aligned(32)));
++  int32_t conjch21_ch20[12*nb_rb] __attribute__((aligned(32)));
++  int32_t conjch31_ch30[12*nb_rb] __attribute__((aligned(32)));
++
++  int32_t af_mf_00[12*nb_rb] __attribute__((aligned(32)));
++  int32_t af_mf_01[12*nb_rb] __attribute__((aligned(32)));
++  int32_t af_mf_10[12*nb_rb] __attribute__((aligned(32)));
++  int32_t af_mf_11[12*nb_rb] __attribute__((aligned(32)));
++  int32_t determ_fin[12*nb_rb] __attribute__((aligned(32)));
++
++  switch (n_rx) {
++    case 2://
++      ch00 = (int *)&ul_ch_estimates_ext[0][symbol*(off+nb_rb*12)];
++      ch01 = (int *)&ul_ch_estimates_ext[2][symbol*(off+nb_rb*12)];
++      ch10 = (int *)&ul_ch_estimates_ext[1][symbol*(off+nb_rb*12)];
++      ch11 = (int *)&ul_ch_estimates_ext[3][symbol*(off+nb_rb*12)];
++      ch20 = NULL;
++      ch21 = NULL;
++      ch30 = NULL;
++      ch31 = NULL;
++      break;
++
++    case 4://
++      ch00 = (int *)&ul_ch_estimates_ext[0][symbol*(off+nb_rb*12)];
++      ch01 = (int *)&ul_ch_estimates_ext[4][symbol*(off+nb_rb*12)];
++      ch10 = (int *)&ul_ch_estimates_ext[1][symbol*(off+nb_rb*12)];
++      ch11 = (int *)&ul_ch_estimates_ext[5][symbol*(off+nb_rb*12)];
++      ch20 = (int *)&ul_ch_estimates_ext[2][symbol*(off+nb_rb*12)];
++      ch21 = (int *)&ul_ch_estimates_ext[6][symbol*(off+nb_rb*12)];
++      ch30 = (int *)&ul_ch_estimates_ext[3][symbol*(off+nb_rb*12)];
++      ch31 = (int *)&ul_ch_estimates_ext[7][symbol*(off+nb_rb*12)];
++      break;
++
++    default:
++      return -1;
++      break;
++  }
++
++  /* 1- Compute the rx channel matrix after compensation: (1/2^log2_max)x(H_herm x H)
++   * for n_rx = 2
++   * |conj_H_00       conj_H_10|    | H_00         H_01|   |(conj_H_00xH_00+conj_H_10xH_10)   (conj_H_00xH_01+conj_H_10xH_11)|
++   * |                         |  x |                  | = |                                                                 |
++   * |conj_H_01       conj_H_11|    | H_10         H_11|   |(conj_H_01xH_00+conj_H_11xH_10)   (conj_H_01xH_01+conj_H_11xH_11)|
++   *
++   */
++
++  if (n_rx>=2){
++    // (1/2^log2_maxh)*conj_H_00xH_00: (1/(64*2))conjH_00*H_00*2^15
++    nr_ulsch_conjch0_mult_ch1(ch00,
++                        ch00,
++                        conjch00_ch00,
++                        nb_rb_0,
++                        shift);
++    // (1/2^log2_maxh)*conj_H_10xH_10: (1/(64*2))conjH_10*H_10*2^15
++    nr_ulsch_conjch0_mult_ch1(ch10,
++                        ch10,
++                        conjch10_ch10,
++                        nb_rb_0,
++                        shift);
++    // conj_H_00xH_01
++    nr_ulsch_conjch0_mult_ch1(ch00,
++                        ch01,
++                        conjch00_ch01,
++                        nb_rb_0,
++                        shift); // this shift is equal to the channel level log2_maxh
++    // conj_H_10xH_11
++    nr_ulsch_conjch0_mult_ch1(ch10,
++                        ch11,
++                        conjch10_ch11,
++                        nb_rb_0,
++                        shift);
++    // conj_H_01xH_01
++    nr_ulsch_conjch0_mult_ch1(ch01,
++                        ch01,
++                        conjch01_ch01,
++                        nb_rb_0,
++                        shift);
++    // conj_H_11xH_11
++    nr_ulsch_conjch0_mult_ch1(ch11,
++                        ch11,
++                        conjch11_ch11,
++                        nb_rb_0,
++                        shift);
++    // conj_H_01xH_00
++    nr_ulsch_conjch0_mult_ch1(ch01,
++                        ch00,
++                        conjch01_ch00,
++                        nb_rb_0,
++                        shift);
++    // conj_H_11xH_10
++    nr_ulsch_conjch0_mult_ch1(ch11,
++                        ch10,
++                        conjch11_ch10,
++                        nb_rb_0,
++                        shift);
++  }
++  if (n_rx==4){
++    // (1/2^log2_maxh)*conj_H_20xH_20: (1/(64*2*16))conjH_20*H_20*2^15
++    nr_ulsch_conjch0_mult_ch1(ch20,
++                        ch20,
++                        conjch20_ch20,
++                        nb_rb_0,
++                        shift);
++
++    // (1/2^log2_maxh)*conj_H_30xH_30: (1/(64*2*4))conjH_30*H_30*2^15
++    nr_ulsch_conjch0_mult_ch1(ch30,
++                        ch30,
++                        conjch30_ch30,
++                        nb_rb_0,
++                        shift);
++
++    // (1/2^log2_maxh)*conj_H_20xH_20: (1/(64*2))conjH_20*H_20*2^15
++    nr_ulsch_conjch0_mult_ch1(ch20,
++                        ch21,
++                        conjch20_ch21,
++                        nb_rb_0,
++                        shift);
++
++    nr_ulsch_conjch0_mult_ch1(ch30,
++                        ch31,
++                        conjch30_ch31,
++                        nb_rb_0,
++                        shift);
++
++    nr_ulsch_conjch0_mult_ch1(ch21,
++                        ch21,
++                        conjch21_ch21,
++                        nb_rb_0,
++                        shift);
++
++    nr_ulsch_conjch0_mult_ch1(ch31,
++                        ch31,
++                        conjch31_ch31,
++                        nb_rb_0,
++                        shift);
++
++    // (1/2^log2_maxh)*conj_H_20xH_20: (1/(64*2))conjH_20*H_20*2^15
++    nr_ulsch_conjch0_mult_ch1(ch21,
++                        ch20,
++                        conjch21_ch20,
++                        nb_rb_0,
++                        shift);
++
++    nr_ulsch_conjch0_mult_ch1(ch31,
++                        ch30,
++                        conjch31_ch30,
++                        nb_rb_0,
++                        shift);
++
++    nr_ulsch_construct_HhH_elements(conjch00_ch00,
++                              conjch01_ch01,
++                              conjch11_ch11,
++                              conjch10_ch10,//
++                              conjch20_ch20,
++                              conjch21_ch21,
++                              conjch30_ch30,
++                              conjch31_ch31,
++                              conjch00_ch01,
++                              conjch01_ch00,
++                              conjch10_ch11,
++                              conjch11_ch10,//
++                              conjch20_ch21,
++                              conjch21_ch20,
++                              conjch30_ch31,
++                              conjch31_ch30,
++                              af_mf_00,
++                              af_mf_01,
++                              af_mf_10,
++                              af_mf_11,
++                              nb_rb_0,
++                              symbol);
++  }
++  if (n_rx==2){
++    nr_ulsch_construct_HhH_elements(conjch00_ch00,
++                              conjch01_ch01,
++                              conjch11_ch11,
++                              conjch10_ch10,//
++                              NULL,
++                              NULL,
++                              NULL,
++                              NULL,
++                              conjch00_ch01,
++                              conjch01_ch00,
++                              conjch10_ch11,
++                              conjch11_ch10,//
++                              NULL,
++                              NULL,
++                              NULL,
++                              NULL,
++                              af_mf_00,
++                              af_mf_01,
++                              af_mf_10,
++                              af_mf_11,
++                              nb_rb_0,
++                              symbol);
++  }
++  //det_HhH = ad -bc
++  nr_ulsch_det_HhH(af_mf_00,//a
++             af_mf_01,//b
++             af_mf_10,//c
++             af_mf_11,//d
++             determ_fin,
++             nb_rb_0,
++             symbol,
++             shift);
++  /* 2- Compute the channel matrix inversion **********************************
++   *
++     *    |(conj_H_00xH_00+conj_H_10xH_10)   (conj_H_00xH_01+conj_H_10xH_11)|
++     * A= |                                                                 |
++     *    |(conj_H_01xH_00+conj_H_11xH_10)   (conj_H_01xH_01+conj_H_11xH_11)|
++     *
++     *
++     *
++     *inv(A) =(1/det)*[d  -b
++     *                 -c  a]
++     *
++     *
++     **************************************************************************/
++  __m128i *rxdataF_comp128_0,*rxdataF_comp128_1,*ul_ch_mag128_0=NULL,*ul_ch_mag128b_0=NULL,*determ_fin_128;//*dl_ch_mag128_1,*dl_ch_mag128b_1,*dl_ch_mag128r_1
++  __m128i mmtmpD0,mmtmpD1,mmtmpD2,mmtmpD3;
++  __m128i *after_mf_a_128,*after_mf_b_128, *after_mf_c_128, *after_mf_d_128;
++  __m128i QAM_amp128={0},QAM_amp128b={0};
++
++  determ_fin_128      = (__m128i *)&determ_fin[0];
++
++  rxdataF_comp128_0   = (__m128i *)&rxdataF_comp[0][symbol*(off+nb_rb*12)];//aatx=0 @ aarx =0
++  rxdataF_comp128_1   = (__m128i *)&rxdataF_comp[n_rx][symbol*(off+nb_rb*12)];//aatx=1 @ aarx =0
++
++  after_mf_a_128 = (__m128i *)af_mf_00;
++  after_mf_b_128 = (__m128i *)af_mf_01;
++  after_mf_c_128 = (__m128i *)af_mf_10;
++  after_mf_d_128 = (__m128i *)af_mf_11;
++
++  if (mod_order>2) {
++    if (mod_order == 4) {
++      QAM_amp128 = _mm_set1_epi16(QAM16_n1);  //2/sqrt(10)
++      QAM_amp128b = _mm_setzero_si128();
++    } else if (mod_order == 6) {
++      QAM_amp128  = _mm_set1_epi16(QAM64_n1); //4/sqrt{42}
++      QAM_amp128b = _mm_set1_epi16(QAM64_n2); //2/sqrt{42}
++    } 
++    ul_ch_mag128_0      = (__m128i *)&ul_ch_mag[0][symbol*(off+nb_rb*12)];
++    ul_ch_mag128b_0     = (__m128i *)&ul_ch_magb[0][symbol*(off+nb_rb*12)];
++  }
++
++  for (int rb=0; rb<3*nb_rb_0; rb++) {
++    if (mod_order>2) {
++      int sum_det =0;
++      for (int k=0; k<4;k++) {
++        sum_det += ((((int *)&determ_fin_128[0])[k])>>2);
++        //printf("det_%d = %d\n",k,sum_det);
++        }
++
++      mmtmpD2 = _mm_slli_epi32(determ_fin_128[0],5);
++      mmtmpD2 = _mm_srai_epi32(mmtmpD2,log2_approx(sum_det));
++      mmtmpD2 = _mm_slli_epi32(mmtmpD2,5);
++
++      mmtmpD3 = _mm_unpacklo_epi32(mmtmpD2,mmtmpD2);
++
++      mmtmpD2 = _mm_unpackhi_epi32(mmtmpD2,mmtmpD2);
++
++      mmtmpD2 = _mm_packs_epi32(mmtmpD3,mmtmpD2);
++
++      ul_ch_mag128_0[0] = mmtmpD2;
++      ul_ch_mag128b_0[0] = mmtmpD2;
++
++      ul_ch_mag128_0[0] = _mm_mulhi_epi16(ul_ch_mag128_0[0],QAM_amp128);
++      ul_ch_mag128_0[0] = _mm_slli_epi16(ul_ch_mag128_0[0],1);
++
++      ul_ch_mag128b_0[0] = _mm_mulhi_epi16(ul_ch_mag128b_0[0],QAM_amp128b);
++      ul_ch_mag128b_0[0] = _mm_slli_epi16(ul_ch_mag128b_0[0],1);
++
++      //print_shorts("mag layer 1:",(int16_t*)&dl_ch_mag128_0[0]);
++      //print_shorts("mag layer 2:",(int16_t*)&dl_ch_mag128_1[0]);
++      //print_shorts("magb layer 1:",(int16_t*)&dl_ch_mag128b_0[0]);
++      //print_shorts("magb layer 2:",(int16_t*)&dl_ch_mag128b_1[0]);
++      //print_shorts("magr layer 1:",(int16_t*)&dl_ch_mag128r_0[0]);
++      //print_shorts("magr layer 2:",(int16_t*)&dl_ch_mag128r_1[0]);
++    }
++    // multiply by channel Inv
++    //rxdataF_zf128_0 = rxdataF_comp128_0*d - b*rxdataF_comp128_1
++    //rxdataF_zf128_1 = rxdataF_comp128_1*a - c*rxdataF_comp128_0
++    //printf("layer_1 \n");
++    mmtmpD0 = nr_ulsch_comp_muli_sum(rxdataF_comp128_0[0],
++                               after_mf_d_128[0],
++                               rxdataF_comp128_1[0],
++                               after_mf_b_128[0],
++                               determ_fin_128[0]);
++
++    //printf("layer_2 \n");
++    mmtmpD1 = nr_ulsch_comp_muli_sum(rxdataF_comp128_1[0],
++                               after_mf_a_128[0],
++                               rxdataF_comp128_0[0],
++                               after_mf_c_128[0],
++                               determ_fin_128[0]);
++
++    rxdataF_comp128_0[0] = mmtmpD0;
++    rxdataF_comp128_1[0] = mmtmpD1;
++#ifdef DEBUG_DLSCH_DEMOD
++    printf("\n Rx signal after ZF l%d rb%d\n",symbol,rb);
++    print_shorts(" Rx layer 1:",(int16_t*)&rxdataF_comp128_0[0]);
++    print_shorts(" Rx layer 2:",(int16_t*)&rxdataF_comp128_1[0]);
++#endif
++    determ_fin_128 += 1;
++    ul_ch_mag128_0 += 1;
++    ul_ch_mag128b_0 += 1;    
++    rxdataF_comp128_0 += 1;
++    rxdataF_comp128_1 += 1;
++    after_mf_a_128 += 1;
++    after_mf_b_128 += 1;
++    after_mf_c_128 += 1;
++    after_mf_d_128 += 1;
++  }
++  _mm_empty();
++  _m_empty();
++   return(0);
++}
++
++//==============================================================================================
++
++/* Main Function */
+ int nr_rx_pusch(PHY_VARS_gNB *gNB,
+                 uint8_t ulsch_id,
+                 uint32_t frame,
+@@ -1184,21 +1969,29 @@ int nr_rx_pusch(PHY_VARS_gNB *gNB,
+   gNB->pusch_vars[ulsch_id]->cl_done = 0;
+ 
+   bwp_start_subcarrier = ((rel15_ul->rb_start + rel15_ul->bwp_start)*NR_NB_SC_PER_RB + frame_parms->first_carrier_offset) % frame_parms->ofdm_symbol_size;
+-  LOG_D(PHY,"pusch %d.%d : bwp_start_subcarrier %d, rb_start %d, first_carrier_offset %d\n", frame,slot,bwp_start_subcarrier, rel15_ul->rb_start, frame_parms->first_carrier_offset);
+-  LOG_D(PHY,"pusch %d.%d : ul_dmrs_symb_pos %x\n",frame,slot,rel15_ul->ul_dmrs_symb_pos);
+-
++  LOG_I(PHY,"pusch %d.%d : bwp_start_subcarrier %d, rb_start %d, first_carrier_offset %d\n", frame,slot,bwp_start_subcarrier, rel15_ul->rb_start, frame_parms->first_carrier_offset);
++  LOG_I(PHY,"pusch %d.%d : ul_dmrs_symb_pos %x\n",frame,slot,rel15_ul->ul_dmrs_symb_pos);
++  LOG_I(PHY,"ulsch RX %x : start_rb %d nb_rb %d mcs %d Nl %d Tpmi %d bwp_start %d start_sc %d start_symbol %d num_symbols %d cdmgrpsnodata %d num_dmrs %d dmrs_ports %d\n",
++          rel15_ul->rnti,rel15_ul->rb_start,rel15_ul->rb_size,rel15_ul->mcs_index,
++          rel15_ul->nrOfLayers,0,rel15_ul->bwp_start,0,rel15_ul->start_symbol_index,rel15_ul->nr_of_symbols,
++          rel15_ul->num_dmrs_cdm_grps_no_data,rel15_ul->ul_dmrs_symb_pos,rel15_ul->dmrs_ports);
+   //----------------------------------------------------------
+   //--------------------- Channel estimation ---------------------
+   //----------------------------------------------------------
+   start_meas(&gNB->ulsch_channel_estimation_stats);
+-  for(uint8_t symbol = rel15_ul->start_symbol_index; symbol < (rel15_ul->start_symbol_index + rel15_ul->nr_of_symbols); symbol++) {
++  for(uint8_t symbol = rel15_ul->start_symbol_index; symbol < (rel15_ul->start_symbol_index + rel15_ul->nr_of_symbols); symbol++) 
++  {
+     uint8_t dmrs_symbol_flag = (rel15_ul->ul_dmrs_symb_pos >> symbol) & 0x01;
+     LOG_D(PHY, "symbol %d, dmrs_symbol_flag :%d\n", symbol, dmrs_symbol_flag);
+-    if (dmrs_symbol_flag == 1) {
++    
++    if (dmrs_symbol_flag == 1) 
++    {
+       if (gNB->pusch_vars[ulsch_id]->dmrs_symbol == INVALID_VALUE)
+         gNB->pusch_vars[ulsch_id]->dmrs_symbol = symbol;
+ 
+       for (int nl=0; nl<rel15_ul->nrOfLayers; nl++)
++      {
++        
+         nr_pusch_channel_estimation(gNB,
+                                     slot,
+                                     get_dmrs_port(nl,rel15_ul->dmrs_ports),
+@@ -1206,6 +1999,7 @@ int nr_rx_pusch(PHY_VARS_gNB *gNB,
+                                     ulsch_id,
+                                     bwp_start_subcarrier,
+                                     rel15_ul);
++      }
+ 
+       nr_gnb_measurements(gNB, ulsch_id, harq_pid, symbol,rel15_ul->nrOfLayers);
+ 
+@@ -1214,9 +2008,12 @@ int nr_rx_pusch(PHY_VARS_gNB *gNB,
+           gNB->pusch_vars[ulsch_id]->ulsch_power[aarx] = 0;
+           gNB->pusch_vars[ulsch_id]->ulsch_noise_power[aarx] = 0;
+         }
+-        gNB->pusch_vars[ulsch_id]->ulsch_power[aarx] += signal_energy_nodc(
+-            &gNB->pusch_vars[ulsch_id]->ul_ch_estimates[aarx][symbol * frame_parms->ofdm_symbol_size],
++        for (aatx = 0; aatx < rel15_ul->nrOfLayers; aatx++)
++        { 
++          gNB->pusch_vars[ulsch_id]->ulsch_power[aarx] += signal_energy_nodc(
++            &gNB->pusch_vars[ulsch_id]->ul_ch_estimates[aatx*gNB->frame_parms.nb_antennas_rx+aarx][symbol * frame_parms->ofdm_symbol_size],
+             rel15_ul->rb_size * 12);
++        }
+         for (int rb = 0; rb < rel15_ul->rb_size; rb++) {
+           gNB->pusch_vars[ulsch_id]->ulsch_noise_power[aarx] +=
+               gNB->measurements.n0_subband_power[aarx][rel15_ul->bwp_start + rel15_ul->rb_start + rb] /
+@@ -1234,23 +2031,29 @@ int nr_rx_pusch(PHY_VARS_gNB *gNB,
+ #endif
+   uint32_t rxdataF_ext_offset = 0;
+ 
+-  for(uint8_t symbol = rel15_ul->start_symbol_index; symbol < (rel15_ul->start_symbol_index + rel15_ul->nr_of_symbols); symbol++) {
++  for(uint8_t symbol = rel15_ul->start_symbol_index; symbol < (rel15_ul->start_symbol_index + rel15_ul->nr_of_symbols); symbol++) 
++  {
+     uint8_t dmrs_symbol_flag = (rel15_ul->ul_dmrs_symb_pos >> symbol) & 0x01;
+-    if (dmrs_symbol_flag == 1) {
++    if (dmrs_symbol_flag == 1) 
++    {
+       if ((rel15_ul->ul_dmrs_symb_pos >> ((symbol + 1) % frame_parms->symbols_per_slot)) & 0x01)
+         AssertFatal(1==0,"Double DMRS configuration is not yet supported\n");
+ 
+       gNB->pusch_vars[ulsch_id]->dmrs_symbol = symbol;
+ 
+-      if (rel15_ul->dmrs_config_type == 0) {
++      if (rel15_ul->dmrs_config_type == 0) 
++      {
+         // if no data in dmrs cdm group is 1 only even REs have no data
+         // if no data in dmrs cdm group is 2 both odd and even REs have no data
+         nb_re_pusch = rel15_ul->rb_size *(12 - (rel15_ul->num_dmrs_cdm_grps_no_data*6));
+       }
+-      else {
++      else 
++      {
+         nb_re_pusch = rel15_ul->rb_size *(12 - (rel15_ul->num_dmrs_cdm_grps_no_data*4));
+       }
+-    } else {
++    } 
++    else 
++    {
+       nb_re_pusch = rel15_ul->rb_size * NR_NB_SC_PER_RB;
+     }
+ 
+@@ -1260,16 +2063,16 @@ int nr_rx_pusch(PHY_VARS_gNB *gNB,
+     //----------------------------------------------------------
+     //--------------------- RBs extraction ---------------------
+     //----------------------------------------------------------
+-    if (nb_re_pusch > 0) {
+-
++    if (nb_re_pusch > 0) 
++    {
+       start_meas(&gNB->ulsch_rbs_extraction_stats);
+-      nr_ulsch_extract_rbs_single(gNB->common_vars.rxdataF,
+-                                  gNB->pusch_vars[ulsch_id],
+-                                  slot,
+-                                  symbol,
+-                                  dmrs_symbol_flag,
+-                                  rel15_ul,
+-                                  frame_parms);
++      nr_ulsch_extract_rbs(gNB->common_vars.rxdataF,
++                           gNB->pusch_vars[ulsch_id],
++                           slot,
++                           symbol,
++                           dmrs_symbol_flag,
++                           rel15_ul,
++                           frame_parms);
+       stop_meas(&gNB->ulsch_rbs_extraction_stats);
+ 
+       //----------------------------------------------------------
+@@ -1280,8 +2083,9 @@ int nr_rx_pusch(PHY_VARS_gNB *gNB,
+                             gNB->ulsch[ulsch_id],
+                             symbol,
+                             dmrs_symbol_flag,
+-                            rel15_ul->rb_size,
+-                            rel15_ul->dmrs_config_type);
++                            nb_re_pusch,
++                            rel15_ul->nrOfLayers,
++                            rel15_ul->rb_size);
+ 
+       if (gNB->pusch_vars[ulsch_id]->cl_done==0) {
+         nr_ulsch_channel_level(gNB->pusch_vars[ulsch_id]->ul_ch_estimates_ext,
+@@ -1312,9 +2116,14 @@ int nr_rx_pusch(PHY_VARS_gNB *gNB,
+                                     gNB->pusch_vars[ulsch_id]->ul_ch_mag0,
+                                     gNB->pusch_vars[ulsch_id]->ul_ch_magb0,
+                                     gNB->pusch_vars[ulsch_id]->rxdataF_comp,
++                                    #ifdef SUPPORT_PMI_MATRIC
+                                     (rel15_ul->nrOfLayers>1) ? gNB->pusch_vars[ulsch_id]->rho : NULL,
++                                    #else
++                                    NULL,
++                                    #endif
+                                     frame_parms,
+                                     symbol,
++                                    nb_re_pusch,
+                                     dmrs_symbol_flag,
+                                     rel15_ul->qam_mod_order,
+                                     rel15_ul->nrOfLayers,
+@@ -1327,19 +2136,37 @@ int nr_rx_pusch(PHY_VARS_gNB *gNB,
+                              gNB->pusch_vars[ulsch_id]->rxdataF_comp,
+                              gNB->pusch_vars[ulsch_id]->ul_ch_mag0,
+                              gNB->pusch_vars[ulsch_id]->ul_ch_magb0,
++                             #ifdef SUPPORT_PMI_MATRIC
++                             (rel15_ul->nrOfLayers>1) ? gNB->pusch_vars[ulsch_id]->rho : NULL,
++                             #else
++                             NULL,
++                             #endif
++                             rel15_ul->nrOfLayers,
+                              symbol,
+-                             rel15_ul->rb_size);
++                             rel15_ul->rb_size,
++                             nb_re_pusch);
++                 
++      if (rel15_ul->nrOfLayers == 2)//Apply zero forcing for 2 Tx layers
++        nr_ulsch_zero_forcing_rx_2layers(gNB->pusch_vars[ulsch_id]->rxdataF_comp,
++                                   gNB->pusch_vars[ulsch_id]->ul_ch_mag0,
++                                   gNB->pusch_vars[ulsch_id]->ul_ch_magb0,                                   
++                                   gNB->pusch_vars[ulsch_id]->ul_ch_estimates_ext,
++                                   rel15_ul->rb_size,
++                                   frame_parms->nb_antennas_rx,
++                                   rel15_ul->qam_mod_order,
++                                   gNB->pusch_vars[ulsch_id]->log2_maxh,
++                                   symbol,
++                                   nb_re_pusch);
+       stop_meas(&gNB->ulsch_mrc_stats);
+ 
+-      // transform precoding = 0 means enabled
+-      if (rel15_ul->transform_precoding == 0) {
+-
+-      #ifdef __AVX2__
++      if (rel15_ul->transformPrecoder == transformPrecoder_enabled)      
++      {
++         #ifdef __AVX2__
+         // For odd number of resource blocks need byte alignment to multiple of 8
+         int nb_re_pusch2 = nb_re_pusch + (nb_re_pusch&7);
+-      #else
++        #else
+         int nb_re_pusch2 = nb_re_pusch;
+-      #endif
++        #endif
+ 
+         // perform IDFT operation on the compensated rxdata if transform precoding is enabled
+         nr_idft(&gNB->pusch_vars[ulsch_id]->rxdataF_comp[0][symbol * nb_re_pusch2], nb_re_pusch);
+@@ -1351,7 +2178,8 @@ int nr_rx_pusch(PHY_VARS_gNB *gNB,
+       //----------------------------------------------------------
+       /* In case PTRS is enabled then LLR will be calculated after PTRS symbols are processed *
+       * otherwise LLR are calculated for each symbol based upon DMRS channel estimates only. */
+-      if (rel15_ul->pdu_bit_map & PUSCH_PDU_BITMAP_PUSCH_PTRS) {
++      if (rel15_ul->pdu_bit_map & PUSCH_PDU_BITMAP_PUSCH_PTRS) 
++      {
+         start_meas(&gNB->ulsch_ptrs_processing_stats);
+         nr_pusch_ptrs_processing(gNB,
+                                  frame_parms,
+@@ -1370,14 +2198,17 @@ int nr_rx_pusch(PHY_VARS_gNB *gNB,
+       /*--------------------  LLRs computation  -------------------------------------------------------------*/
+       /*-----------------------------------------------------------------------------------------------------*/
+       start_meas(&gNB->ulsch_llr_stats);
+-      nr_ulsch_compute_llr(&gNB->pusch_vars[ulsch_id]->rxdataF_comp[0][symbol * (off + rel15_ul->rb_size * NR_NB_SC_PER_RB)],
+-                           gNB->pusch_vars[ulsch_id]->ul_ch_mag0,
+-                           gNB->pusch_vars[ulsch_id]->ul_ch_magb0,
+-                           &gNB->pusch_vars[ulsch_id]->llr[rxdataF_ext_offset * rel15_ul->qam_mod_order],
+-                           rel15_ul->rb_size,
+-                           gNB->pusch_vars[ulsch_id]->ul_valid_re_per_slot[symbol],
+-                           symbol,
+-                           rel15_ul->qam_mod_order);
++      for (aatx=0; aatx < rel15_ul->nrOfLayers; aatx++)
++      {
++        nr_ulsch_compute_llr(&gNB->pusch_vars[ulsch_id]->rxdataF_comp[aatx*frame_parms->nb_antennas_rx][symbol * (off + rel15_ul->rb_size * NR_NB_SC_PER_RB)],
++                             gNB->pusch_vars[ulsch_id]->ul_ch_mag0[aatx*frame_parms->nb_antennas_rx],
++                             gNB->pusch_vars[ulsch_id]->ul_ch_magb0[aatx*frame_parms->nb_antennas_rx],
++                             &gNB->pusch_vars[ulsch_id]->llr_layers[aatx][rxdataF_ext_offset * rel15_ul->qam_mod_order],
++                             rel15_ul->rb_size,
++                             gNB->pusch_vars[ulsch_id]->ul_valid_re_per_slot[symbol],
++                             symbol,
++                             rel15_ul->qam_mod_order);
++      }
+       stop_meas(&gNB->ulsch_llr_stats);
+       rxdataF_ext_offset += gNB->pusch_vars[ulsch_id]->ul_valid_re_per_slot[symbol];
+     }
+diff --git a/openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c b/openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c
+index bf96a76106..ad41ebf3d6 100644
+--- a/openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c
++++ b/openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c
+@@ -64,7 +64,7 @@ void nr_ulsch_qpsk_llr(int32_t *rxdataF_comp,
+ //----------------------------------------------------------------------------------------------
+ 
+ void nr_ulsch_16qam_llr(int32_t *rxdataF_comp,
+-                        int32_t **ul_ch_mag,
++                        int32_t *ul_ch_mag,
+                         int16_t  *ulsch_llr,
+                         uint32_t nb_rb,
+                         uint32_t nb_re,
+@@ -110,12 +110,12 @@ void nr_ulsch_16qam_llr(int32_t *rxdataF_comp,
+ 
+ #if defined(__x86_64__) || defined(__i386__)
+ #ifdef __AVX2__
+-    ch_mag = (__m256i*)&ul_ch_mag[0][(symbol*(off+(nb_rb*12)))];
++    ch_mag = (__m256i*)&ul_ch_mag[(symbol*(off+(nb_rb*12)))];
+ #else
+-    ch_mag = (__m128i*)&ul_ch_mag[0][(symbol*(off+(nb_rb*12)))];
++    ch_mag = (__m128i*)&ul_ch_mag[(symbol*(off+(nb_rb*12)))];
+ #endif
+ #elif defined(__arm__)
+-  ch_mag = (int16x8_t*)&ul_ch_mag[0][(symbol*nb_rb*12)];
++  ch_mag = (int16x8_t*)&ul_ch_mag[(symbol*nb_rb*12)];
+ #endif
+ 
+ #ifdef __AVX2__
+@@ -231,8 +231,8 @@ void nr_ulsch_16qam_llr(int32_t *rxdataF_comp,
+ //----------------------------------------------------------------------------------------------
+ 
+ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
+-                        int32_t **ul_ch_mag,
+-                        int32_t **ul_ch_magb,
++                        int32_t *ul_ch_mag,
++                        int32_t *ul_ch_magb,
+                         int16_t  *ulsch_llr,
+                         uint32_t nb_rb,
+                         uint32_t nb_re,
+@@ -265,15 +265,15 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
+ 
+ #if defined(__x86_64__) || defined(__i386__)
+ #ifdef __AVX2__
+-  ch_mag = (__m256i*)&ul_ch_mag[0][(symbol*(off+(nb_rb*12)))];
+-  ch_magb = (__m256i*)&ul_ch_magb[0][(symbol*(off+(nb_rb*12)))];
++  ch_mag = (__m256i*)&ul_ch_mag[(symbol*(off+(nb_rb*12)))];
++  ch_magb = (__m256i*)&ul_ch_magb[(symbol*(off+(nb_rb*12)))];
+ #else
+-  ch_mag = (__m128i*)&ul_ch_mag[0][(symbol*nb_rb*12)];
+-  ch_magb = (__m128i*)&ul_ch_magb[0][(symbol*nb_rb*12)];
++  ch_mag = (__m128i*)&ul_ch_mag[(symbol*nb_rb*12)];
++  ch_magb = (__m128i*)&ul_ch_magb[(symbol*nb_rb*12)];
+ #endif
+ #elif defined(__arm__)
+-  ch_mag = (int16x8_t*)&ul_ch_mag[0][(symbol*nb_rb*12)];
+-  ch_magb = (int16x8_t*)&ul_ch_magb[0][(symbol*nb_rb*12)];
++  ch_mag = (int16x8_t*)&ul_ch_mag[(symbol*nb_rb*12)];
++  ch_magb = (int16x8_t*)&ul_ch_magb[(symbol*nb_rb*12)];
+ #endif
+ 
+ #ifdef __AVX2__
+@@ -471,8 +471,8 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
+ 
+ 
+ void nr_ulsch_compute_llr(int32_t *rxdataF_comp,
+-                          int32_t **ul_ch_mag,
+-                          int32_t **ul_ch_magb,
++                          int32_t *ul_ch_mag,
++                          int32_t *ul_ch_magb,
+                           int16_t *ulsch_llr,
+                           uint32_t nb_rb,
+                           uint32_t nb_re,
+diff --git a/openair1/PHY/NR_UE_ESTIMATION/filt16a_32.c b/openair1/PHY/NR_UE_ESTIMATION/filt16a_32.c
+index 65ae288252..2cb7609d62 100644
+--- a/openair1/PHY/NR_UE_ESTIMATION/filt16a_32.c
++++ b/openair1/PHY/NR_UE_ESTIMATION/filt16a_32.c
+@@ -148,7 +148,7 @@ short filt8_dcr0_h[8]= {
+ 0,4096,8192,12288,16384,0,0,0};
+ 
+ short filt8_l1[8] = {
+-24576,16384,0,0,0,0,0,0};
++24576,16384,8192,0,0,0,0,0};
+ 
+ short filt8_ml1[8] = {
+ -8192,0,8192,16384,8192,0,0,0};
+@@ -163,10 +163,10 @@ short filt8_mm1[8]= {
+ 0,0,0,0,8192,16384,8192,0};
+ 
+ short filt8_dcl1[8]= {
+-0,0,0,16384,12288,8192,4096,0};
++0,0,16384,12288,8192,4096,0,0};
+ 
+ short filt8_dcr1[8]= {
+-0,0,0,0,4096,8192,12288,16384};
++0,0,0,4096,8192,12288,16384,0};
+ 
+ short filt8_dcl1_h[8]= {
+ 0,16384,12288,8192,4096,0,0,0};
+diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_ulsch_coding.c b/openair1/PHY/NR_UE_TRANSPORT/nr_ulsch_coding.c
+index 945dc51673..a293eec6d8 100644
+--- a/openair1/PHY/NR_UE_TRANSPORT/nr_ulsch_coding.c
++++ b/openair1/PHY/NR_UE_TRANSPORT/nr_ulsch_coding.c
+@@ -258,6 +258,15 @@ int nr_ulsch_encoding(PHY_VARS_NR_UE *ue,
+   Ilbrm = 0;
+   Tbslbrm = 950984; //max tbs
+   Coderate = 0.0;
++  
++  #if 0
++  harq_process->a[0] = 0x31;
++  for (int i = 1; i < harq_process->pusch_pdu.pusch_data.tb_size; i++) 
++  {
++    harq_process->a[i] = (i&0xff);
++  }
++  #endif
++  
+   trace_NRpdu(DIRECTION_UPLINK, harq_process->a, harq_process->pusch_pdu.pusch_data.tb_size, 0, WS_C_RNTI, 0, 0, 0,0, 0);
+ ///////////
+ /////////////////////////////////////////////////////////////////////////////////////////  
+diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_ulsch_ue.c b/openair1/PHY/NR_UE_TRANSPORT/nr_ulsch_ue.c
+index 99dc2b658e..5d76e654ba 100644
+--- a/openair1/PHY/NR_UE_TRANSPORT/nr_ulsch_ue.c
++++ b/openair1/PHY/NR_UE_TRANSPORT/nr_ulsch_ue.c
+@@ -98,13 +98,12 @@ void nr_ue_ulsch_procedures(PHY_VARS_NR_UE *UE,
+                                uint8_t thread_id,
+                                int gNB_id) {
+ 
+-  LOG_D(PHY,"nr_ue_ulsch_procedures hard_id %d %d.%d\n",harq_pid,frame,slot);
++  LOG_I(PHY,"nr_ue_ulsch_procedures hard_id %d %d.%d\n",harq_pid,frame,slot);
+ 
+   uint32_t available_bits;
+   uint8_t cwd_index, l;
+   uint32_t scrambled_output[NR_MAX_NB_CODEWORDS][NR_MAX_PDSCH_ENCODED_LENGTH>>5];
+-  int16_t **tx_layers;
+-  int32_t **txdataF;
++  	  
+   int8_t Wf[2], Wt[2], l_prime[2], delta;
+   uint8_t nb_dmrs_re_per_rb;
+   int ap, i;
+@@ -112,6 +111,9 @@ void nr_ue_ulsch_procedures(PHY_VARS_NR_UE *UE,
+ 
+   NR_DL_FRAME_PARMS *frame_parms = &UE->frame_parms;
+   NR_UE_PUSCH *pusch_ue = UE->pusch_vars[thread_id][gNB_id];
++  int16_t **tx_layers = (int16_t **)pusch_ue->txdataF_layers;
++  int16_t **tx_precoding = (int16_t **)pusch_ue->txdataF_precoding;
++  int32_t **txdataF = UE->common_vars.txdataF;
+ 
+   uint8_t  num_of_codewords = 1; // tmp assumption
+   int      Nid_cell = 0;
+@@ -148,9 +150,9 @@ void nr_ue_ulsch_procedures(PHY_VARS_NR_UE *UE,
+ 
+     nb_dmrs_re_per_rb = ((dmrs_type == pusch_dmrs_type1) ? 6:4)*cdm_grps_no_data;
+ 
+-    LOG_D(PHY,"ulsch %x : start_rb %d bwp_start %d start_sc %d start_symbol %d num_symbols %d cdmgrpsnodata %d num_dmrs %d dmrs_re_per_rb %d\n",
+-          rnti,start_rb,pusch_pdu->bwp_start,start_sc,start_symbol,number_of_symbols,cdm_grps_no_data,number_dmrs_symbols,nb_dmrs_re_per_rb);
+-
++    LOG_I(PHY,"ulsch TX %x : start_rb %d nb_rb %d mod_order %d Nl %d Tpmi %d bwp_start %d start_sc %d start_symbol %d num_symbols %d cdmgrpsnodata %d num_dmrs %d dmrs_re_per_rb %d\n",
++          rnti,start_rb,nb_rb,mod_order,Nl,pusch_pdu->Tpmi,pusch_pdu->bwp_start,start_sc,start_symbol,number_of_symbols,cdm_grps_no_data,number_dmrs_symbols,nb_dmrs_re_per_rb);
++    
+     // TbD num_of_mod_symbols is set but never used
+     N_RE_prime = NR_NB_SC_PER_RB*number_of_symbols - nb_dmrs_re_per_rb*number_dmrs_symbols - N_PRB_oh;
+     harq_process_ul_ue->num_of_mod_symbols = N_RE_prime*nb_rb*num_of_codewords;
+@@ -236,8 +238,6 @@ void nr_ue_ulsch_procedures(PHY_VARS_NR_UE *UE,
+   /////////////////////////ULSCH layer mapping/////////////////////////
+   ///////////
+ 
+-  tx_layers = (int16_t **)pusch_ue->txdataF_layers;
+-
+   nr_ue_layer_mapping(UE->ulsch[thread_id][gNB_id],
+                       Nl,
+                       available_bits/mod_order,
+@@ -257,7 +257,7 @@ void nr_ue_ulsch_procedures(PHY_VARS_NR_UE *UE,
+   int16_t *dmrs_seq = NULL;
+ 
+   // if  transform precoding is enbaled (value 0)
+-  if (pusch_pdu->transform_precoding == 0) {
++  if (pusch_pdu->transformPrecoder == transformPrecoder_enabled) {
+ 
+     uint32_t nb_re_pusch=nb_rb * NR_NB_SC_PER_RB;
+     uint32_t y_offset = 0;
+@@ -315,10 +315,7 @@ void nr_ue_ulsch_procedures(PHY_VARS_NR_UE *UE,
+       LOG_M("UE_DMRS_SEQ.m","UE_DMRS_SEQ", dmrs_seq,nb_re_pusch,1,1);
+     #endif
+ 
+-  }
+-  else
+-    memcpy(ulsch_ue->y, tx_layers[0], (available_bits/mod_order)*sizeof(int32_t));
+-  
++  }  
+ 
+   ///////////
+   ////////////////////////////////////////////////////////////////////////
+@@ -328,145 +325,233 @@ void nr_ue_ulsch_procedures(PHY_VARS_NR_UE *UE,
+   /////////////////////////ULSCH RE mapping/////////////////////////
+   ///////////
+ 
+-  txdataF = UE->common_vars.txdataF;
+-
+-  for (ap=0; ap< Nl; ap++) {
+-
+-    uint8_t k_prime = 0;
+-    uint16_t m = 0;
+-
+-    
+-    #ifdef DEBUG_PUSCH_MAPPING
++    for (ap=0; ap< Nl; ap++) 
++    {      
++      uint8_t k_prime = 0;
++      uint16_t m = 0;
++      
++      #ifdef DEBUG_PUSCH_MAPPING
+       printf("NR_ULSCH_UE: Value of CELL ID %d /t, u %d \n", frame_parms->Nid_cell, u);
+-    #endif
+-
+-  
+-
+-    // DMRS params for this ap
+-    get_Wt(Wt, ap, dmrs_type);
+-    get_Wf(Wf, ap, dmrs_type);
+-    delta = get_delta(ap, dmrs_type);
+-
+-    for (l=start_symbol; l<start_symbol+number_of_symbols; l++) {
+-
+-      uint16_t k = start_sc;
+-      uint16_t n = 0;
+-      uint8_t is_dmrs_sym = 0;
+-      uint8_t is_ptrs_sym = 0;
+-      uint16_t dmrs_idx = 0, ptrs_idx = 0;
+-
+-      if ((ul_dmrs_symb_pos >> l) & 0x01) {
+-        is_dmrs_sym = 1;
+-
+-        // transform precoding disabled (value 1)
+-        if (pusch_pdu->transform_precoding == 1){
+-        
+-          if (dmrs_type == pusch_dmrs_type1)
+-            dmrs_idx = (pusch_pdu->bwp_start + start_rb)*6;
+-          else
+-            dmrs_idx = (pusch_pdu->bwp_start + start_rb)*4;
+-
+-          // TODO: performance improvement, we can skip the modulation of DMRS symbols outside the bandwidth part
+-          // Perform this on gold sequence, not required when SC FDMA operation is done,
+-	        LOG_D(PHY,"DMRS in symbol %d\n",l);
+-          nr_modulation(pusch_dmrs[l][0], n_dmrs*2, DMRS_MOD_ORDER, mod_dmrs); // currently only codeword 0 is modulated. Qm = 2 as DMRS is QPSK modulated
+-        
+-        } else {
+-          dmrs_idx = 0;
+-        }
+-       
+-       
+-      } else if (pusch_pdu->pdu_bit_map & PUSCH_PDU_BITMAP_PUSCH_PTRS) {
+-
+-        AssertFatal(pusch_pdu->transform_precoding == 1, "PTRS NOT SUPPORTED IF TRANSFORM PRECODING IS ENABLED\n");
++      #endif
+ 
+-        if(is_ptrs_symbol(l, ulsch_ue->ptrs_symbols)) {
+-          is_ptrs_sym = 1;
+-          nr_modulation(pusch_dmrs[l][0], nb_rb, DMRS_MOD_ORDER, mod_ptrs);
++      // DMRS params for this ap
++      get_Wt(Wt, ap, dmrs_type);
++      get_Wf(Wf, ap, dmrs_type);
++      delta = get_delta(ap, dmrs_type);
++
++      for (l=start_symbol; l<start_symbol+number_of_symbols; l++) 
++      {
++        uint16_t k = start_sc;
++        uint16_t n = 0;
++        uint8_t is_dmrs_sym = 0;
++        uint8_t is_ptrs_sym = 0;
++        uint16_t dmrs_idx = 0, ptrs_idx = 0;
++
++        if ((ul_dmrs_symb_pos >> l) & 0x01) 
++        {
++          is_dmrs_sym = 1;
++          if (pusch_pdu->transformPrecoder == transformPrecoder_disabled)
++          {         
++            if (dmrs_type == pusch_dmrs_type1)
++              dmrs_idx = (pusch_pdu->bwp_start + start_rb)*6;
++            else
++              dmrs_idx = (pusch_pdu->bwp_start + start_rb)*4;
++
++            // TODO: performance improvement, we can skip the modulation of DMRS symbols outside the bandwidth part
++            // Perform this on gold sequence, not required when SC FDMA operation is done,
++	          LOG_D(PHY,"DMRS in symbol %d\n",l);
++              nr_modulation(pusch_dmrs[l][0], n_dmrs*2, DMRS_MOD_ORDER, mod_dmrs); // currently only codeword 0 is modulated. Qm = 2 as DMRS is QPSK modulated        
++          }
++          else 
++          {
++            dmrs_idx = 0;
++          }   
++        } 
++        else if (pusch_pdu->pdu_bit_map & PUSCH_PDU_BITMAP_PUSCH_PTRS) 
++        {       
++          AssertFatal(pusch_pdu->transformPrecoder == transformPrecoder_disabled, "PTRS NOT SUPPORTED IF TRANSFORM PRECODING IS ENABLED\n"); 
++
++          if(is_ptrs_symbol(l, ulsch_ue->ptrs_symbols)) 
++          {
++            is_ptrs_sym = 1;
++            nr_modulation(pusch_dmrs[l][0], nb_rb, DMRS_MOD_ORDER, mod_ptrs);
++          }
+         }
+-      }
+ 
+-      for (i=0; i< nb_rb*NR_NB_SC_PER_RB; i++) {
++        for (i=0; i< nb_rb*NR_NB_SC_PER_RB; i++) 
++        {
++          uint8_t is_dmrs = 0;
++          uint8_t is_ptrs = 0;
+ 
+-        uint8_t is_dmrs = 0;
+-        uint8_t is_ptrs = 0;
++          sample_offsetF = l*frame_parms->ofdm_symbol_size + k;
+ 
+-        sample_offsetF = l*frame_parms->ofdm_symbol_size + k;
+-
+-        if (is_dmrs_sym) {
+-          if (k == ((start_sc+get_dmrs_freq_idx_ul(n, k_prime, delta, dmrs_type))%frame_parms->ofdm_symbol_size))
+-            is_dmrs = 1;
+-        } else if (is_ptrs_sym) {
++          if (is_dmrs_sym) 
++          {
++            if (k == ((start_sc+get_dmrs_freq_idx_ul(n, k_prime, delta, dmrs_type))%frame_parms->ofdm_symbol_size))
++              is_dmrs = 1;
++          } 
++          else if (is_ptrs_sym) 
++          {
+             is_ptrs = is_ptrs_subcarrier(k,
+-                                         rnti,
+-                                         ap,
+-                                         dmrs_type,
+-                                         K_ptrs,
+-                                         nb_rb,
+-                                         pusch_pdu->pusch_ptrs.ptrs_ports_list[0].ptrs_re_offset,
+-                                         start_sc,
+-                                         frame_parms->ofdm_symbol_size);
+-        }
+-
+-        if (is_dmrs == 1) {
+-          // if transform precoding is enabled
+-          if (pusch_pdu->transform_precoding == 0) {
+-          
+-            ((int16_t*)txdataF[ap])[(sample_offsetF)<<1] = (Wt[l_prime[0]]*Wf[k_prime]*AMP*dmrs_seq[2*dmrs_idx]) >> 15;
+-            ((int16_t*)txdataF[ap])[((sample_offsetF)<<1) + 1] = (Wt[l_prime[0]]*Wf[k_prime]*AMP*dmrs_seq[(2*dmrs_idx) + 1]) >> 15;
++                                        rnti,
++                                        ap,
++                                        dmrs_type,
++                                        K_ptrs,
++                                        nb_rb,
++                                        pusch_pdu->pusch_ptrs.ptrs_ports_list[0].ptrs_re_offset,
++                                        start_sc,
++                                        frame_parms->ofdm_symbol_size);
++          }
++
++          if (is_dmrs == 1) 
++          {
++            if (pusch_pdu->transformPrecoder == transformPrecoder_enabled) 
++            {          
++              ((int16_t*)tx_precoding[ap])[(sample_offsetF)<<1] = (Wt[l_prime[0]]*Wf[k_prime]*AMP*dmrs_seq[2*dmrs_idx]) >> 15;
++              ((int16_t*)tx_precoding[ap])[((sample_offsetF)<<1) + 1] = (Wt[l_prime[0]]*Wf[k_prime]*AMP*dmrs_seq[(2*dmrs_idx) + 1]) >> 15;
++            } 
++            else 
++            {
++                ((int16_t*)tx_precoding[ap])[(sample_offsetF)<<1] = (Wt[l_prime[0]]*Wf[k_prime]*AMP*mod_dmrs[dmrs_idx<<1]) >> 15;
++                ((int16_t*)tx_precoding[ap])[((sample_offsetF)<<1) + 1] = (Wt[l_prime[0]]*Wf[k_prime]*AMP*mod_dmrs[(dmrs_idx<<1) + 1]) >> 15;
++            }
++            
++            #ifdef DEBUG_PUSCH_MAPPING
++            printf("DMRS: Layer: %d\t, dmrs_idx %d\t l %d \t k %d \t k_prime %d \t n %d \t dmrs: %d %d\n",
++              ap, dmrs_idx, l, k, k_prime, n, ((int16_t*)tx_precoding[ap])[(sample_offsetF)<<1],
++              ((int16_t*)tx_precoding[ap])[((sample_offsetF)<<1) + 1]);
++            #endif
++            
++            dmrs_idx++;
++            k_prime++;
++            k_prime&=1;
++            n+=(k_prime)?0:1;
++          }  
++          else if (is_ptrs == 1) 
++          {
++            ((int16_t*)tx_precoding[ap])[(sample_offsetF)<<1] = (beta_ptrs*AMP*mod_ptrs[ptrs_idx<<1]) >> 15;
++            ((int16_t*)tx_precoding[ap])[((sample_offsetF)<<1) + 1] = (beta_ptrs*AMP*mod_ptrs[(ptrs_idx<<1) + 1]) >> 15;
++
++            ptrs_idx++;
++          } 
++          else if (!is_dmrs_sym || allowed_xlsch_re_in_dmrs_symbol(k, start_sc, frame_parms->ofdm_symbol_size, cdm_grps_no_data, dmrs_type)) 
++          {
++            if (pusch_pdu->transformPrecoder == transformPrecoder_disabled)
++            { 
++              ((int16_t*)tx_precoding[ap])[(sample_offsetF)<<1]       = ((int16_t *)tx_layers[ap])[m<<1];
++              ((int16_t*)tx_precoding[ap])[((sample_offsetF)<<1) + 1] = ((int16_t *)tx_layers[ap])[(m<<1) + 1];
++            }
++            else
++            {
++            ((int16_t*)tx_precoding[ap])[(sample_offsetF)<<1]       = ((int16_t *) ulsch_ue->y)[m<<1];
++            ((int16_t*)tx_precoding[ap])[((sample_offsetF)<<1) + 1] = ((int16_t *) ulsch_ue->y)[(m<<1) + 1];
++            }
++            
++            #ifdef DEBUG_PUSCH_MAPPING
++            printf("DATA: layer %d\t m %d\t l %d \t k %d \t txdataF: %d %d\n",
++            ap, m, l, k, ((int16_t*)tx_precoding[ap])[(sample_offsetF)<<1],
++            ((int16_t*)tx_precoding[ap])[((sample_offsetF)<<1) + 1]);          
++            #endif
++
++            m++;
++          } 
++          else 
++          {
++            ((int16_t*)tx_precoding[ap])[(sample_offsetF)<<1]       = 0;
++            ((int16_t*)tx_precoding[ap])[((sample_offsetF)<<1) + 1] = 0;
++          }
++
++          if (++k >= frame_parms->ofdm_symbol_size)
++          {
++            k -= frame_parms->ofdm_symbol_size;
++          }
++        } //for (i=0; i< nb_rb*NR_NB_SC_PER_RB; i++) 
++      }//for (l=start_symbol; l<start_symbol+number_of_symbols; l++)
++    }//for (ap=0; ap< Nl; ap++)
++
++    /////////////////////////ULSCH precoding/////////////////////////
++    ///////////
++    ///Layer Precoding and Antenna port mapping
++    // tx_layers 0-3 are mapped on antenna ports
++    // The precoding info is supported by nfapi such as num_prgs, prg_size, prgs_list and pm_idx
++    // The same precoding matrix is applied on prg_size RBs, Thus
++    //        pmi = prgs_list[rbidx/prg_size].pm_idx, rbidx =0,...,rbSize-1
++    // The Precoding matrix:
++    for (int ap=0; ap<frame_parms->nb_antennas_tx; ap++) 
++    {
++      for (int l=start_symbol; l<start_symbol+number_of_symbols; l++) 
++      {
++        uint16_t k = start_sc;
++
++        for (int rb=0; rb<nb_rb; rb++) 
++        {
++          //get pmi info
++          uint8_t pmi=pusch_pdu->Tpmi;
+           
+-          } else {
+-
+-              ((int16_t*)txdataF[ap])[(sample_offsetF)<<1] = (Wt[l_prime[0]]*Wf[k_prime]*AMP*mod_dmrs[dmrs_idx<<1]) >> 15;
+-              ((int16_t*)txdataF[ap])[((sample_offsetF)<<1) + 1] = (Wt[l_prime[0]]*Wf[k_prime]*AMP*mod_dmrs[(dmrs_idx<<1) + 1]) >> 15;
+-
++          if (pmi == 0) {//unitary Precoding
++            if(ap< pusch_pdu->nrOfLayers)
++              memcpy((void*)&txdataF[ap][l*frame_parms->ofdm_symbol_size  + k],
++                     (void*)&tx_precoding[ap][2*(l*frame_parms->ofdm_symbol_size + k)],
++                     NR_NB_SC_PER_RB*sizeof(int32_t));
++            else
++              memset((void*)&txdataF[ap][l*frame_parms->ofdm_symbol_size + k],
++                     0,
++                     NR_NB_SC_PER_RB*sizeof(int32_t));
++         
++            k += NR_NB_SC_PER_RB;
++            if (k >= frame_parms->ofdm_symbol_size) {
++              k -= frame_parms->ofdm_symbol_size;
++            }
++          }
++          else
++          {
++            //get the precoding matrix weights:
++            char *W_prec;
++            switch (frame_parms->nb_antennas_tx) 
++            {
++              case 1://1 antenna port
++                W_prec = nr_W_1l_2p[pmi][ap];
++                break;
++              case 2://2 antenna ports
++                if (pusch_pdu->nrOfLayers == 1)//1 layer
++                  W_prec = nr_W_1l_2p[pmi][ap];
++                else//2 layers
++                  W_prec = nr_W_2l_2p[pmi][ap];
++                break;
++              case 4://4 antenna ports
++                if (pusch_pdu->nrOfLayers == 1)//1 layer
++                  W_prec = nr_W_1l_4p[pmi][ap];
++                else if (pusch_pdu->nrOfLayers == 2)//2 layers
++                  W_prec = nr_W_2l_4p[pmi][ap];
++                else if (pusch_pdu->nrOfLayers == 3)//3 layers
++                  W_prec = nr_W_3l_4p[pmi][ap];
++                else//4 layers
++                  W_prec = nr_W_4l_4p[pmi][ap];
++                break;
++              default:
++                LOG_D(PHY,"Precoding 1,2, or 4 antenna ports are currently supported\n");
++                W_prec = nr_W_1l_2p[pmi][ap];
++                break;
+             }
+ 
+-          #ifdef DEBUG_PUSCH_MAPPING
+-            printf("dmrs_idx %d\t l %d \t k %d \t k_prime %d \t n %d \t dmrs: %d %d\n",
+-            dmrs_idx, l, k, k_prime, n, ((int16_t*)txdataF[ap])[(sample_offsetF)<<1],
+-            ((int16_t*)txdataF[ap])[((sample_offsetF)<<1) + 1]);
+-          #endif
+-
+-
+-          dmrs_idx++;
+-          k_prime++;
+-          k_prime&=1;
+-          n+=(k_prime)?0:1;
+-
+-        }  else if (is_ptrs == 1) {
+-
+-          ((int16_t*)txdataF[ap])[(sample_offsetF)<<1] = (beta_ptrs*AMP*mod_ptrs[ptrs_idx<<1]) >> 15;
+-          ((int16_t*)txdataF[ap])[((sample_offsetF)<<1) + 1] = (beta_ptrs*AMP*mod_ptrs[(ptrs_idx<<1) + 1]) >> 15;
+-
+-          ptrs_idx++;
+-
+-        } else if (!is_dmrs_sym || allowed_xlsch_re_in_dmrs_symbol(k, start_sc, frame_parms->ofdm_symbol_size, cdm_grps_no_data, dmrs_type)) {
+-
+-          ((int16_t*)txdataF[ap])[(sample_offsetF)<<1]       = ((int16_t *) ulsch_ue->y)[m<<1];
+-          ((int16_t*)txdataF[ap])[((sample_offsetF)<<1) + 1] = ((int16_t *) ulsch_ue->y)[(m<<1) + 1];
+-
+-        #ifdef DEBUG_PUSCH_MAPPING
+-          printf("m %d\t l %d \t k %d \t txdataF: %d %d\n",
+-          m, l, k, ((int16_t*)txdataF[ap])[(sample_offsetF)<<1],
+-          ((int16_t*)txdataF[ap])[((sample_offsetF)<<1) + 1]);
+-        #endif
+-
+-          m++;
+-
+-        } else {
+-
+-          ((int16_t*)txdataF[ap])[(sample_offsetF)<<1]       = 0;
+-          ((int16_t*)txdataF[ap])[((sample_offsetF)<<1) + 1] = 0;
+-
+-        }
++            for (int i=0; i<NR_NB_SC_PER_RB; i++) 
++            {
++              int32_t re_offset = l*frame_parms->ofdm_symbol_size + k;
++              int32_t precodatatx_F = nr_layer_precoder(tx_precoding, W_prec, pusch_pdu->nrOfLayers, re_offset);
++              ((int16_t*)txdataF[ap])[(re_offset<<1)] = ((int16_t *) &precodatatx_F)[0];
++              ((int16_t*)txdataF[ap])[(re_offset<<1) + 1] = ((int16_t *) &precodatatx_F)[1];
++                            
++              if (++k >= frame_parms->ofdm_symbol_size) 
++              {
++                k -= frame_parms->ofdm_symbol_size;
++              }
++            }
++          }
++        } //RB loop
++      } // symbol loop
++    }// port loop
+ 
+-        if (++k >= frame_parms->ofdm_symbol_size)
+-          k -= frame_parms->ofdm_symbol_size;
+-      }
+-    }
+-  }
+-  }
++  } //for (cwd_index = 0;cwd_index < num_of_codewords; cwd_index++)
+ 
+   NR_UL_UE_HARQ_t *harq_process_ulsch=NULL;
+   harq_process_ulsch = UE->ulsch[thread_id][gNB_id][0]->harq_processes[harq_pid];
+diff --git a/openair1/PHY/defs_gNB.h b/openair1/PHY/defs_gNB.h
+index 11b5112a6c..600ba07cb3 100644
+--- a/openair1/PHY/defs_gNB.h
++++ b/openair1/PHY/defs_gNB.h
+@@ -479,7 +479,7 @@ typedef struct {
+   /// \brief Cross-correlation of two UE signals.
+   /// - first index: rx antenna [0..nb_antennas_rx[
+   /// - second index: symbol [0..]
+-  int32_t **rho;
++  int32_t ***rho;
+   /// \f$\log_2(\max|H_i|^2)\f$
+   int16_t log2_maxh;
+   /// \brief Magnitude of Uplink Channel first layer (16QAM level/First 64QAM level).
+@@ -509,6 +509,10 @@ typedef struct {
+   /// \brief llr values.
+   /// - first index: ? [0..1179743] (hard coded)
+   int16_t *llr;
++  /// \brief llr values per layer.
++  /// - first index: ? [0..3] (hard coded)
++  /// - first index: ? [0..1179743] (hard coded)
++  int16_t **llr_layers;
+   /// DMRS symbol index, to be updated every DMRS symbol within a slot.
+   uint8_t dmrs_symbol;
+   // PTRS symbol index, to be updated every PTRS symbol within a slot.
+diff --git a/openair1/PHY/defs_nr_UE.h b/openair1/PHY/defs_nr_UE.h
+index 69978f5548..02988b590a 100644
+--- a/openair1/PHY/defs_nr_UE.h
++++ b/openair1/PHY/defs_nr_UE.h
+@@ -229,6 +229,9 @@ typedef struct {
+ typedef struct {
+   /// TX buffers for multiple layers
+   int32_t *txdataF_layers[NR_MAX_NB_LAYERS];
++
++  /// TX buffers for UE-spec transmission
++  int32_t *txdataF_precoding[NR_MAX_NB_LAYERS];
+   } NR_UE_PUSCH;
+ 
+ typedef struct {
+diff --git a/openair1/PHY/defs_nr_common.h b/openair1/PHY/defs_nr_common.h
+index c385d323e3..fbe094f41c 100644
+--- a/openair1/PHY/defs_nr_common.h
++++ b/openair1/PHY/defs_nr_common.h
+@@ -93,7 +93,7 @@
+ #define NR_MAX_CSET_DURATION 3
+ 
+ #define NR_MAX_NB_RBG 18
+-#define NR_MAX_NB_LAYERS 2 // 8 // SU-MIMO (3GPP TS 38.211 V15.4.0 section 7.3.1.3)
++#define NR_MAX_NB_LAYERS 4 // 8 // SU-MIMO (3GPP TS 38.211 V15.4.0 section 7.3.1.3)
+ #define NR_MAX_NB_CODEWORDS 2
+ #define NR_MAX_NB_HARQ_PROCESSES 16
+ #define NR_MAX_PDSCH_ENCODED_LENGTH (NR_MAX_NB_RB*NR_SYMBOLS_PER_SLOT*NR_NB_SC_PER_RB*8*NR_MAX_NB_LAYERS) // 8 is the maximum modulation order (it was 950984 before !!)
+@@ -104,11 +104,11 @@
+ #define MAX_NUM_NR_DLSCH_SEGMENTS (NR_MAX_NB_LAYERS*34)
+ #define MAX_NR_DLSCH_PAYLOAD_BYTES (MAX_NUM_NR_DLSCH_SEGMENTS*1056)
+ 
+-#define MAX_NUM_NR_ULSCH_SEGMENTS 34
++#define MAX_NUM_NR_ULSCH_SEGMENTS (NR_MAX_NB_LAYERS*34)
+ #define MAX_NR_ULSCH_PAYLOAD_BYTES (MAX_NUM_NR_ULSCH_SEGMENTS*1056)
+ 
+-#define MAX_NUM_NR_CHANNEL_BITS (14*273*12*8)  // 14 symbols, 273 RB
+-#define MAX_NUM_NR_RE (14*273*12)
++#define MAX_NUM_NR_CHANNEL_BITS (4*14*273*12*8)  // 14 symbols, 273 RB
++#define MAX_NUM_NR_RE (4*14*273*12)
+ #define NR_RX_NB_TH 1
+ #define NR_NB_TH_SLOT 2
+ 
+diff --git a/openair1/SCHED_NR/nr_ru_procedures.c b/openair1/SCHED_NR/nr_ru_procedures.c
+index d8fa11d157..640a0740ef 100644
+--- a/openair1/SCHED_NR/nr_ru_procedures.c
++++ b/openair1/SCHED_NR/nr_ru_procedures.c
+@@ -526,7 +526,8 @@ void nr_fep0(RU_t *ru, int first_half) {
+     end_symbol = NR_SYMBOLS_PER_SLOT;
+   }
+ 
+-  LOG_D(PHY,"In fep0 for slot = %d, first_half = %d, start_symbol = %d, end_symbol = %d\n", proc->tti_rx, first_half, start_symbol, end_symbol);
++  LOG_D(PHY,"In fep0 for slot = %d, first_half = %d, start_symbol = %d, end_symbol = %d, nb_antennas_rx = %d,N_TA_offset = %d\n", 
++        proc->tti_rx, first_half, start_symbol, end_symbol,fp->nb_antennas_rx,ru->N_TA_offset);
+   //  printf("fep0: slot %d\n",slot);
+ 
+   VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_PROCEDURES_RU_FEPRX+proc->tti_rx, 1);
+diff --git a/openair1/SCHED_NR/phy_procedures_nr_gNB.c b/openair1/SCHED_NR/phy_procedures_nr_gNB.c
+index 1685e206ae..63c708e565 100644
+--- a/openair1/SCHED_NR/phy_procedures_nr_gNB.c
++++ b/openair1/SCHED_NR/phy_procedures_nr_gNB.c
+@@ -253,13 +253,13 @@ void nr_postDecode(PHY_VARS_gNB *gNB, notifiedFIFO_elt_t *req) {
+   // if all segments are done
+   if (rdata->nbSegments == ulsch_harq->processedSegments) {
+     if (decodeSuccess) {
+-      LOG_D(PHY,"[gNB %d] ULSCH: Setting ACK for SFN/SF %d.%d (pid %d, ndi %d, status %d, round %d, TBS %d, Max interation (all seg) %d)\n",
++      LOG_I(PHY,"[gNB %d] ULSCH: Setting ACK for SFN/SF %d.%d (pid %d, ndi %d, status %d, round %d, TBS %d, Max interation (all seg) %d)\n",
+             gNB->Mod_id,ulsch_harq->frame,ulsch_harq->slot,rdata->harq_pid,pusch_pdu->pusch_data.new_data_indicator,ulsch_harq->status,ulsch_harq->round,ulsch_harq->TBS,rdata->decodeIterations);
+       ulsch_harq->status = SCH_IDLE;
+       ulsch_harq->round  = 0;
+       ulsch->harq_mask &= ~(1 << rdata->harq_pid);
+ 
+-      LOG_D(PHY, "ULSCH received ok \n");
++      LOG_I(PHY, "ULSCH received ok \n");
+       nr_fill_indication(gNB,ulsch_harq->frame, ulsch_harq->slot, rdata->ulsch_id, rdata->harq_pid, 0,0);
+       //dumpsig=1;
+     } else {
+@@ -281,7 +281,7 @@ void nr_postDecode(PHY_VARS_gNB *gNB, notifiedFIFO_elt_t *req) {
+       }
+       ulsch_harq->handled  = 1;
+ 
+-      LOG_D(PHY, "ULSCH %d in error\n",rdata->ulsch_id);
++      LOG_I(PHY, "ULSCH %d in error\n",rdata->ulsch_id);
+       nr_fill_indication(gNB,ulsch_harq->frame, ulsch_harq->slot, rdata->ulsch_id, rdata->harq_pid, 1,0);
+     }
+ /*
+@@ -365,6 +365,14 @@ void nr_ulsch_procedures(PHY_VARS_gNB *gNB, int frame_rx, int slot_rx, int ULSCH
+ 	number_dmrs_symbols, // number of dmrs symbols irrespective of single or double symbol dmrs
+ 	pusch_pdu->qam_mod_order,
+ 	pusch_pdu->nrOfLayers);
++
++
++  nr_ulsch_layer_demapping(gNB->pusch_vars[ULSCH_id]->llr,
++				     pusch_pdu->nrOfLayers,
++				     pusch_pdu->qam_mod_order,
++				     G,
++				     gNB->pusch_vars[ULSCH_id]->llr_layers);
++             
+   //----------------------------------------------------------
+   //------------------- ULSCH unscrambling -------------------
+   //----------------------------------------------------------
+@@ -731,7 +739,7 @@ int phy_procedures_gNB_uespec_RX(PHY_VARS_gNB *gNB, int frame_rx, int slot_rx) {
+             (ulsch_harq->slot == slot_rx) &&
+             (ulsch_harq->handled == 0)){
+ 
+-          LOG_D(PHY, "PUSCH detection started in frame %d slot %d\n",
++          LOG_I(PHY, "PUSCH detection started in frame %d slot %d\n",
+                 frame_rx,slot_rx);
+           int num_dmrs=0;
+           for (int s=0;s<NR_NUMBER_OF_SYMBOLS_PER_SLOT; s++)
+@@ -767,7 +775,7 @@ int phy_procedures_gNB_uespec_RX(PHY_VARS_gNB *gNB, int frame_rx, int slot_rx) {
+ 	        start_meas(&gNB->rx_pusch_stats);
+           no_sig = nr_rx_pusch(gNB, ULSCH_id, frame_rx, slot_rx, harq_pid);
+           if (no_sig) {
+-            LOG_D(PHY, "PUSCH not detected in frame %d, slot %d\n", frame_rx, slot_rx);
++            LOG_I(PHY, "PUSCH not detected in frame %d, slot %d\n", frame_rx, slot_rx);
+             nr_fill_indication(gNB, frame_rx, slot_rx, ULSCH_id, harq_pid, 1,1);
+             return 1;
+           }
+@@ -783,7 +791,7 @@ int phy_procedures_gNB_uespec_RX(PHY_VARS_gNB *gNB, int frame_rx, int slot_rx) {
+               dB_fixed_x10(gNB->pusch_vars[ULSCH_id]->ulsch_noise_power_tot) + gNB->pusch_thres) {
+              NR_gNB_SCH_STATS_t *stats=get_ulsch_stats(gNB,ulsch);
+ 
+-             LOG_D(PHY, "PUSCH not detected in %d.%d (%d,%d,%d)\n",frame_rx,slot_rx,
++             LOG_I(PHY, "PUSCH not detected in %d.%d (%d,%d,%d)\n",frame_rx,slot_rx,
+                    dB_fixed_x10(gNB->pusch_vars[ULSCH_id]->ulsch_power_tot),
+                    dB_fixed_x10(gNB->pusch_vars[ULSCH_id]->ulsch_noise_power_tot),gNB->pusch_thres);
+              gNB->pusch_vars[ULSCH_id]->ulsch_power_tot = gNB->pusch_vars[ULSCH_id]->ulsch_noise_power_tot;
+diff --git a/openair1/SCHED_NR_UE/fapi_nr_ue_l1.c b/openair1/SCHED_NR_UE/fapi_nr_ue_l1.c
+index f6e4d90bee..8e1df435c3 100644
+--- a/openair1/SCHED_NR_UE/fapi_nr_ue_l1.c
++++ b/openair1/SCHED_NR_UE/fapi_nr_ue_l1.c
+@@ -157,7 +157,7 @@ int8_t nr_ue_scheduled_response(nr_scheduled_response_t *scheduled_response){
+       for (i = 0; i < ul_config->number_pdus; ++i){
+ 
+         AssertFatal(ul_config->ul_config_list[i].pdu_type <= FAPI_NR_UL_CONFIG_TYPES,"pdu_type %d out of bounds\n",ul_config->ul_config_list[i].pdu_type);
+-        LOG_D(PHY, "In %s: processing %s PDU of %d total UL PDUs (ul_config %p) \n", __FUNCTION__, ul_pdu_type[ul_config->ul_config_list[i].pdu_type - 1], ul_config->number_pdus, ul_config);
++        LOG_I(PHY, "In %s i %d: processing %s PDU of %d total UL PDUs (ul_config %p) \n", __FUNCTION__, i, ul_pdu_type[ul_config->ul_config_list[i].pdu_type - 1], ul_config->number_pdus, ul_config);
+ 
+         uint8_t pdu_type = ul_config->ul_config_list[i].pdu_type, current_harq_pid, gNB_id = 0;
+         /* PRACH */
+@@ -180,6 +180,8 @@ int8_t nr_ue_scheduled_response(nr_scheduled_response_t *scheduled_response){
+           if (harq_process_ul_ue){
+ 
+             nfapi_nr_ue_pusch_pdu_t *pusch_pdu = &harq_process_ul_ue->pusch_pdu;
++            
++            LOG_I(PHY, "In %s i %d: copy pusch_config_pdu nrOfLayers:%d, num_dmrs_cdm_grps_no_data:%d \n", __FUNCTION__, i, pusch_config_pdu->nrOfLayers,pusch_config_pdu->num_dmrs_cdm_grps_no_data);
+ 
+             memcpy(pusch_pdu, pusch_config_pdu, sizeof(nfapi_nr_ue_pusch_pdu_t));
+ 
+diff --git a/openair1/SCHED_NR_UE/phy_procedures_nr_ue.c b/openair1/SCHED_NR_UE/phy_procedures_nr_ue.c
+index 940931694c..0a0a9cc2f9 100644
+--- a/openair1/SCHED_NR_UE/phy_procedures_nr_ue.c
++++ b/openair1/SCHED_NR_UE/phy_procedures_nr_ue.c
+@@ -270,9 +270,12 @@ void phy_procedures_nrUE_TX(PHY_VARS_NR_UE *ue,
+   AssertFatal(ue->CC_id == 0, "Transmission on secondary CCs is not supported yet\n");
+ 
+   VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_PROCEDURES_UE_TX,VCD_FUNCTION_IN);
+-
+-  memset(ue->common_vars.txdataF[0], 0, sizeof(int)*14*ue->frame_parms.ofdm_symbol_size);
+-
++  
++  for(int i=0; i< ue->frame_parms.nb_antennas_tx; ++i)
++  {
++    memset(ue->common_vars.txdataF[i], 0, sizeof(int)*14*ue->frame_parms.ofdm_symbol_size);
++  }
++  
+   LOG_D(PHY,"****** start TX-Chain for AbsSubframe %d.%d ******\n", frame_tx, slot_tx);
+ 
+ 
+diff --git a/openair1/SIMULATION/NR_PHY/dlsim.c b/openair1/SIMULATION/NR_PHY/dlsim.c
+index 1eb6027936..d5a1054654 100644
+--- a/openair1/SIMULATION/NR_PHY/dlsim.c
++++ b/openair1/SIMULATION/NR_PHY/dlsim.c
+@@ -768,7 +768,7 @@ int main(int argc, char **argv)
+ 
+   prepare_scd(scd);
+ 
+-  fill_default_secondaryCellGroup(scc, scd, secondaryCellGroup, 0, 1, n_tx, 0, 0, 0);
++  fill_default_secondaryCellGroup(scc, scd, secondaryCellGroup, 0, 1, n_tx, n_rx, 0, 0, 0);
+ 
+   /* RRC parameter validation for secondaryCellGroup */
+   fix_scd(scd);
+diff --git a/openair1/SIMULATION/NR_PHY/ulschsim.c b/openair1/SIMULATION/NR_PHY/ulschsim.c
+index 94aea36a67..fb8b4295ae 100644
+--- a/openair1/SIMULATION/NR_PHY/ulschsim.c
++++ b/openair1/SIMULATION/NR_PHY/ulschsim.c
+@@ -137,6 +137,7 @@ int main(int argc, char **argv)
+   uint16_t nb_symb_sch = 12;
+   uint16_t nb_rb = 50;
+   uint8_t Imcs = 9;
++  uint8_t Nl = 1;
+ 
+   double DS_TDL = .03;
+ 
+@@ -150,7 +151,7 @@ int main(int argc, char **argv)
+   randominit(0);
+ 
+   //while ((c = getopt(argc, argv, "df:hpg:i:j:n:l:m:r:s:S:y:z:M:N:F:R:P:")) != -1) {
+-  while ((c = getopt(argc, argv, "hg:n:s:S:py:z:M:N:R:F:m:l:r:")) != -1) {
++  while ((c = getopt(argc, argv, "hg:n:s:S:py:z:M:N:R:F:m:l:r:W:")) != -1) {
+     switch (c) {
+       /*case 'f':
+          write_output_file = 1;
+@@ -251,7 +252,7 @@ int main(int argc, char **argv)
+       case 'y':
+         n_tx = atoi(optarg);
+ 
+-        if ((n_tx == 0) || (n_tx > 2)) {
++        if ((n_tx == 0) || (n_tx > 4)) {
+           printf("Unsupported number of TX antennas %d. Exiting.\n", n_tx);
+           exit(-1);
+         }
+@@ -261,7 +262,7 @@ int main(int argc, char **argv)
+       case 'z':
+         n_rx = atoi(optarg);
+ 
+-        if ((n_rx == 0) || (n_rx > 2)) {
++        if ((n_rx == 0) || (n_rx > 4)) {
+           printf("Unsupported number of RX antennas %d. Exiting.\n", n_rx);
+           exit(-1);
+         }
+@@ -299,6 +300,10 @@ int main(int argc, char **argv)
+           printf("Illegal PBCH phase (0-3) got %d\n", pbch_phase);
+         break;*/
+ 
++      case 'W':
++        Nl = atoi(optarg);
++      break;
++
+       case 'm':
+         Imcs = atoi(optarg);
+ #ifdef DEBUG_NR_ULSCHSIM
+@@ -335,13 +340,14 @@ int main(int argc, char **argv)
+           printf("-z Number of RX antennas used in UE\n");
+           //printf("-i Relative strength of first intefering eNB (in dB) - cell_id mod 3 = 1\n");
+           //printf("-j Relative strength of second intefering eNB (in dB) - cell_id mod 3 = 2\n");
++          printf("-W number of layer\n");
+           printf("-M Multiple SSB positions in burst\n");
+           printf("-N Nid_cell\n");
+           printf("-R N_RB_UL\n");
+           printf("-F Input filename (.txt format) for RX conformance testing\n");
+-          printf("-m\n");
+-          printf("-l\n");
+-          printf("-r\n");
++          printf("-m MCS\n");
++          printf("-l number of symbol\n");
++          printf("-r number of RB\n");
+           //printf("-O oversampling factor (1,2,4,8,16)\n");
+           //printf("-A Interpolation_filname Run with Abstraction to generate Scatter plot using interpolation polynomial in file\n");
+           //printf("-C Generate Calibration information for Abstraction (effective SNR adjustment to remove Pe bias w.r.t. AWGN)\n");
+@@ -359,8 +365,8 @@ int main(int argc, char **argv)
+     snr1 = snr0 + 10;
+ 
+   gNB2UE = new_channel_desc_scm(n_tx,
+-		                n_rx,
+-				channel_model,
++                                n_rx,
++                                channel_model,
+                                 61.44e6, //N_RB2sampling_rate(N_RB_DL),
+                                 40e6, //N_RB2channel_bandwidth(N_RB_DL),
+                                 DS_TDL,
+@@ -426,8 +432,7 @@ int main(int argc, char **argv)
+   uint8_t length_dmrs = 1;
+   uint8_t N_PRB_oh;
+   uint16_t N_RE_prime,code_rate;
+-  unsigned char mod_order;
+-  uint8_t Nl = 1;
++  unsigned char mod_order;  
+   uint8_t rvidx = 0;
+   uint8_t UE_id = 0;
+ 
+@@ -437,9 +442,14 @@ int main(int argc, char **argv)
+ 
+   NR_UE_ULSCH_t *ulsch_ue = UE->ulsch[0][0][0];
+ 
++  if ((Nl==4)||(Nl==3))
++  {
++    nb_re_dmrs = nb_re_dmrs*2;
++  }
++
+   mod_order = nr_get_Qm_ul(Imcs, 0);
+   code_rate = nr_get_code_rate_ul(Imcs, 0);
+-  available_bits = nr_get_G(nb_rb, nb_symb_sch, nb_re_dmrs, length_dmrs, mod_order, 1);
++  available_bits = nr_get_G(nb_rb, nb_symb_sch, nb_re_dmrs, length_dmrs, mod_order, Nl);
+   TBS = nr_compute_tbs(mod_order,code_rate, nb_rb, nb_symb_sch, nb_re_dmrs*length_dmrs, 0, 0, Nl);
+ 
+   printf("\nAvailable bits %u TBS %u mod_order %d\n", available_bits, TBS, mod_order);
+diff --git a/openair1/SIMULATION/NR_PHY/ulsim.c b/openair1/SIMULATION/NR_PHY/ulsim.c
+index 69316d6b88..b6cfae3fe1 100644
+--- a/openair1/SIMULATION/NR_PHY/ulsim.c
++++ b/openair1/SIMULATION/NR_PHY/ulsim.c
+@@ -282,7 +282,7 @@ int main(int argc, char **argv)
+   int gNB_id = 0;
+   int ap;
+   int tx_offset;
+-  int32_t txlev=0;
++  int32_t txlev_sum = 0, atxlev[4];
+   int start_rb = 0;
+   int UE_id =0; // [hna] only works for UE_id = 0 because NUMBER_OF_NR_UE_MAX is set to 1 (phy_init_nr_gNB causes segmentation fault)
+   float target_error_rate = 0.01;
+@@ -307,7 +307,7 @@ int main(int argc, char **argv)
+   uint16_t ptrsSymbPerSlot = 0;
+   uint16_t ptrsRePerSymb = 0;
+ 
+-  uint8_t transform_precoding = 1; // 0 - ENABLE, 1 - DISABLE
++  uint8_t transformPrecoder = transformPrecoder_disabled; // 0 - ENABLE, 1 - DISABLE
+   uint8_t num_dmrs_cdm_grps_no_data = 1;
+   uint8_t mcs_table = 0;
+ 
+@@ -329,7 +329,7 @@ int main(int argc, char **argv)
+   /* initialize the sin-cos table */
+    InitSinLUT();
+ 
+-  while ((c = getopt(argc, argv, "a:b:c:d:ef:g:h:ikl:m:n:p:r:s:u:w:y:z:F:G:H:M:N:PR:S:T:U:L:Z")) != -1) {
++  while ((c = getopt(argc, argv, "a:b:c:d:ef:g:h:i:kl:m:n:p:r:s:u:w:y:z:F:G:H:M:N:PR:S:T:U:L:Z:W:")) != -1) {
+     printf("handling optarg %c\n",c);
+     switch (c) {
+ 
+@@ -363,8 +363,8 @@ int main(int argc, char **argv)
+       scg_fd = fopen(optarg, "r");
+       
+       if (scg_fd == NULL) {
+-	printf("Error opening %s\n", optarg);
+-	exit(-1);
++        printf("Error opening %s\n", optarg);
++        exit(-1);
+       }
+ 
+       break;
+@@ -437,6 +437,10 @@ int main(int argc, char **argv)
+     case 'm':
+       Imcs = atoi(optarg);
+       break;
++
++    case 'W':
++      precod_nbr_layers = atoi(optarg);
++      break;
+       
+     case 'n':
+       n_trials = atoi(optarg);
+@@ -485,9 +489,9 @@ int main(int argc, char **argv)
+     case 'y':
+       n_tx = atoi(optarg);
+       
+-      if ((n_tx == 0) || (n_tx > 2)) {
+-	printf("Unsupported number of tx antennas %d\n", n_tx);
+-	exit(-1);
++      if ((n_tx == 0) || (n_tx > 4)) {
++          printf("Unsupported number of tx antennas %d\n", n_tx);
++          exit(-1);
+       }
+       
+       break;
+@@ -496,8 +500,8 @@ int main(int argc, char **argv)
+       n_rx = atoi(optarg);
+       
+       if ((n_rx == 0) || (n_rx > 8)) {
+-	printf("Unsupported number of rx antennas %d\n", n_rx);
+-	exit(-1);
++          printf("Unsupported number of rx antennas %d\n", n_rx);
++          exit(-1);
+       }
+       
+       break;
+@@ -506,8 +510,8 @@ int main(int argc, char **argv)
+       input_fd = fopen(optarg, "r");
+       
+       if (input_fd == NULL) {
+-	printf("Problem with filename %s\n", optarg);
+-	exit(-1);
++          printf("Problem with filename %s\n", optarg);
++          exit(-1);
+       }
+       
+       break;
+@@ -568,11 +572,11 @@ int main(int argc, char **argv)
+ 
+     case 'Z':
+ 
+-      transform_precoding = 0; // enabled
++      transformPrecoder = transformPrecoder_enabled; 
+       num_dmrs_cdm_grps_no_data = 2;
+       mcs_table = 3;
+       
+-      printf("NOTE: TRANSFORM PRECODING (SC-FDMA) is ENABLED in UPLINK (0 - ENABLE, 1 - DISABLE) : %d \n",  transform_precoding);
++      printf("NOTE: TRANSFORM PRECODING (SC-FDMA) is ENABLED in UPLINK (0 - ENABLE, 1 - DISABLE) : %d \n",  transformPrecoder);
+ 
+       break;
+ 
+@@ -611,6 +615,7 @@ int main(int argc, char **argv)
+       printf("-U Change DMRS Config, arguments list DMRS TYPE{0=A,1=B} DMRS AddPos{0:3}, e.g. -U 2 0 2 \n");
+       printf("-Q If -F used, read parameters from file\n");
+       printf("-Z If -Z is used, SC-FDMA or transform precoding is enabled in Uplink \n");
++	  printf("-W Num of layer for PUSCH\n");
+       exit(-1);
+       break;
+ 
+@@ -713,7 +718,7 @@ int main(int argc, char **argv)
+ 
+   prepare_scd(scd);
+ 
+-  fill_default_secondaryCellGroup(scc, scd, secondaryCellGroup, 0, 1, n_tx, 0, 0, 0);
++  fill_default_secondaryCellGroup(scc, scd, secondaryCellGroup, 0, 1, n_tx, n_rx, 0, 0, 0);
+ 
+   // xer_fprint(stdout, &asn_DEF_NR_CellGroupConfig, (const void*)secondaryCellGroup);
+ 
+@@ -724,9 +729,9 @@ int main(int argc, char **argv)
+ 
+   gNB->if_inst->NR_PHY_config_req      = nr_phy_config_request;
+   // common configuration
+-  rrc_mac_config_req_gNB(0,0, n_tx, n_rx, 0, scc, &rrc.carrier.mib,0, 0, NULL);
++  rrc_mac_config_req_gNB(0,0, n_tx, n_rx, n_tx, scc, &rrc.carrier.mib,0, 0, NULL);
+   // UE dedicated configuration
+-  rrc_mac_config_req_gNB(0,0, n_tx, n_rx, 0, scc, &rrc.carrier.mib,1, secondaryCellGroup->spCellConfig->reconfigurationWithSync->newUE_Identity,secondaryCellGroup);
++  rrc_mac_config_req_gNB(0,0, n_tx, n_rx, n_tx, scc, &rrc.carrier.mib,1, secondaryCellGroup->spCellConfig->reconfigurationWithSync->newUE_Identity,secondaryCellGroup);
+   frame_parms->nb_antennas_tx = n_tx;
+   frame_parms->nb_antennas_rx = n_rx;
+   nfapi_nr_config_request_scf_t *cfg = &gNB->gNB_config;
+@@ -870,14 +875,18 @@ int main(int argc, char **argv)
+   uint16_t number_dmrs_symbols = get_dmrs_symbols_in_slot(l_prime_mask, nb_symb_sch);
+   printf("num dmrs sym %d\n",number_dmrs_symbols);
+   uint8_t  nb_re_dmrs          = (dmrs_config_type == pusch_dmrs_type1) ? 6 : 4;
+-
+-  // if transform precoding is enabled
+-  if (transform_precoding == 0) {
++  
++  if ((frame_parms->nb_antennas_tx==4)&&(precod_nbr_layers==4))
++  {
++    num_dmrs_cdm_grps_no_data = 2;
++  }
++  
++  if (transformPrecoder == transformPrecoder_enabled) {  
+ 
+     AssertFatal(enable_ptrs == 0, "PTRS NOT SUPPORTED IF TRANSFORM PRECODING IS ENABLED\n");
+ 
+     int8_t index = get_index_for_dmrs_lowpapr_seq((NR_NB_SC_PER_RB/2) * nb_rb);
+-	  AssertFatal(index >= 0, "Num RBs not configured according to 3GPP 38.211 section 6.3.1.4. For PUSCH with transform precoding, num RBs cannot be multiple of any other primenumber other than 2,3,5\n");
++	AssertFatal(index >= 0, "Num RBs not configured according to 3GPP 38.211 section 6.3.1.4. For PUSCH with transform precoding, num RBs cannot be multiple of any other primenumber other than 2,3,5\n");
+     
+     dmrs_config_type = pusch_dmrs_type1;
+ 
+@@ -886,7 +895,7 @@ int main(int argc, char **argv)
+ 
+   nb_re_dmrs   = nb_re_dmrs * num_dmrs_cdm_grps_no_data;
+ 
+-  unsigned int available_bits  = nr_get_G(nb_rb, nb_symb_sch, nb_re_dmrs, number_dmrs_symbols, mod_order, 1);
++  unsigned int available_bits  = nr_get_G(nb_rb, nb_symb_sch, nb_re_dmrs, number_dmrs_symbols, mod_order, precod_nbr_layers);
+   unsigned int TBS             = nr_compute_tbs(mod_order, code_rate, nb_rb, nb_symb_sch, nb_re_dmrs * number_dmrs_symbols, 0, 0, precod_nbr_layers);
+ 
+   
+@@ -1082,14 +1091,14 @@ int main(int argc, char **argv)
+       pusch_pdu->mcs_table = mcs_table;
+       pusch_pdu->target_code_rate = code_rate;
+       pusch_pdu->qam_mod_order = mod_order;
+-      pusch_pdu->transform_precoding = transform_precoding;
++      pusch_pdu->transformPrecoder = transformPrecoder;
+       pusch_pdu->data_scrambling_id = *scc->physCellId;
+-      pusch_pdu->nrOfLayers = 1;
++      pusch_pdu->nrOfLayers = precod_nbr_layers;
+       pusch_pdu->ul_dmrs_symb_pos = l_prime_mask;
+       pusch_pdu->dmrs_config_type = dmrs_config_type;
+       pusch_pdu->ul_dmrs_scrambling_id =  *scc->physCellId;
+       pusch_pdu->scid = 0;
+-      pusch_pdu->dmrs_ports = 1;
++      pusch_pdu->dmrs_ports = ((1<<precod_nbr_layers)-1);      
+       pusch_pdu->num_dmrs_cdm_grps_no_data = num_dmrs_cdm_grps_no_data;
+       pusch_pdu->resource_alloc = 1; 
+       pusch_pdu->rb_start = start_rb;
+@@ -1109,7 +1118,7 @@ int main(int argc, char **argv)
+       pusch_pdu->pusch_ptrs.ptrs_ports_list[0].ptrs_re_offset = 0;
+ 
+       // if transform precoding is enabled
+-      if (transform_precoding == 0) {
++      if (transformPrecoder == transformPrecoder_enabled) { 
+ 
+         pusch_pdu->dfts_ofdm.low_papr_group_number = *scc->physCellId % 30; // U as defined in 38.211 section 6.4.1.1.1.2 
+         pusch_pdu->dfts_ofdm.low_papr_sequence_number = 0;     // V as defined in 38.211 section 6.4.1.1.1.2
+@@ -1167,10 +1176,10 @@ int main(int argc, char **argv)
+       ul_config.ul_config_list[0].pusch_config_pdu.pusch_ptrs.ptrs_ports_list   = (nfapi_nr_ue_ptrs_ports_t *) malloc(2*sizeof(nfapi_nr_ue_ptrs_ports_t));
+       ul_config.ul_config_list[0].pusch_config_pdu.pusch_ptrs.ptrs_ports_list[0].ptrs_re_offset = 0;
+ 
+-      ul_config.ul_config_list[0].pusch_config_pdu.transform_precoding = transform_precoding;
++      ul_config.ul_config_list[0].pusch_config_pdu.transformPrecoder = transformPrecoder;
+ 
+       // if transform precoding is enabled
+-      if (transform_precoding == 0) {
++      if (transformPrecoder == transformPrecoder_enabled) {
+    
+         ul_config.ul_config_list[0].pusch_config_pdu.dfts_ofdm.low_papr_group_number = *scc->physCellId % 30;// U as defined in 38.211 section 6.4.1.1.1.2 
+         ul_config.ul_config_list[0].pusch_config_pdu.dfts_ofdm.low_papr_sequence_number = 0;// V as defined in 38.211 section 6.4.1.1.1.2
+@@ -1198,28 +1207,54 @@ int main(int argc, char **argv)
+         LOG_D(PHY, "Sending Uplink data \n");
+         nr_ue_pusch_common_procedures(UE,
+                                       slot,
+-                                      &UE->frame_parms,1);
+-
+-        if (n_trials==1) {
+-          LOG_M("txsig0.m","txs0", UE->common_vars.txdata[0],frame_parms->samples_per_subframe*10,1,1);
++                                      &UE->frame_parms,
++									  frame_parms->nb_antennas_tx);
++        
++        
++        if (n_trials==1)
++        {
++          LOG_M("txsig0.m","txs0", &UE->common_vars.txdata[0][slot_offset],slot_length,1,1);
+           LOG_M("txsig0F.m","txs0F", UE->common_vars.txdataF[0],frame_parms->ofdm_symbol_size*14,1,1);
++          if (precod_nbr_layers > 1)
++          {
++            LOG_M("txsig1.m","txs1", &UE->common_vars.txdata[1][slot_offset],slot_length,1,1);
++            LOG_M("txsig1F.m","txs1F", UE->common_vars.txdataF[1],frame_parms->ofdm_symbol_size*14,1,1);
++            if (precod_nbr_layers==4)
++            {          
++              LOG_M("txsig2.m","txs2", &UE->common_vars.txdata[2][slot_offset],slot_length,1,1);
++              LOG_M("txsig3.m","txs3", &UE->common_vars.txdata[3][slot_offset],slot_length,1,1);
++
++              LOG_M("txsig2F.m","txs2F", UE->common_vars.txdataF[2],frame_parms->ofdm_symbol_size*14,1,1);
++              LOG_M("txsig3F.m","txs3F", UE->common_vars.txdataF[3],frame_parms->ofdm_symbol_size*14,1,1);
++            }
++          }
+         }
+         ///////////
+         ////////////////////////////////////////////////////
+         tx_offset = frame_parms->get_samples_slot_timestamp(slot,frame_parms,0);
+-
+-        txlev = signal_energy(&UE->common_vars.txdata[0][tx_offset + 5*frame_parms->ofdm_symbol_size + 4*frame_parms->nb_prefix_samples + frame_parms->nb_prefix_samples0],
++        txlev_sum = 0;
++        for (int aa=0; aa<frame_parms->nb_antennas_tx; aa++) 
++        {
++          atxlev[aa] = signal_energy(&UE->common_vars.txdata[aa][tx_offset + 5*frame_parms->ofdm_symbol_size + 4*frame_parms->nb_prefix_samples + frame_parms->nb_prefix_samples0],
+                               frame_parms->ofdm_symbol_size + frame_parms->nb_prefix_samples);
+-      }	
+-      else n_trials = 1;
+-
+-      if (input_fd == NULL ) {
++                              
++          txlev_sum += atxlev[aa];               
++          
++          if (n_trials==1) printf("txlev[%d] = %d (%f dB) txlev_sum %d\n",aa,atxlev[aa],10*log10((double)atxlev[aa]),txlev_sum);
++        }
++      } 
++      else
++      { 
++        n_trials = 1;
++      }
+ 
+-        sigma_dB = 10 * log10((double)txlev * ((double)frame_parms->ofdm_symbol_size/(12*nb_rb))) - SNR;;
++      if (input_fd == NULL ) 
++      {
++        sigma_dB = 10 * log10((double)txlev_sum * ((double)frame_parms->ofdm_symbol_size/(12*nb_rb))) - SNR;;
+         sigma    = pow(10,sigma_dB/10);
+ 
+ 
+-        if(n_trials==1) printf("sigma %f (%f dB), txlev %f (factor %f)\n",sigma,sigma_dB,10*log10((double)txlev),(double)(double)
++        if(n_trials==1) printf("sigma %f (%f dB), txlev_sum %f (factor %f)\n",sigma,sigma_dB,10*log10((double)txlev_sum),(double)(double)
+                                 frame_parms->ofdm_symbol_size/(12*nb_rb));
+ 
+         for (i=0; i<slot_length; i++) {
+@@ -1230,15 +1265,54 @@ int main(int argc, char **argv)
+         }
+ 
+ 
+-        if (UE2gNB->max_Doppler == 0) {
++        if (UE2gNB->max_Doppler == 0) 
++        {
+           multipath_channel(UE2gNB, s_re, s_im, r_re, r_im, slot_length, 0, (n_trials==1)?1:0);
+         } else {
+           multipath_tv_channel(UE2gNB, s_re, s_im, r_re, r_im, 2*slot_length, 0);
+         }
+-        for (i=0; i<slot_length; i++) {
+-          for (ap=0; ap<frame_parms->nb_antennas_rx; ap++) {
++        for (i=0; i<slot_length; i++) 
++        {
++          for (ap=0; ap<frame_parms->nb_antennas_rx; ap++) 
++          {
++            if (channel_model == AWGN) 
++            {
++              double H_awgn[4][4] ={{1.0, 0.0, 0.0, 0.0},//rx 0
++                                    {0.0, 1.0, 0.0, 0.0},  //rx 1
++                                    {0.0, 0.0, 1.0, 0.0},  //rx 2
++                                    {0.0, 0.0, 0.0, 1.0}};//rx 3
++              #if 0
++              double H_awgn_mimo[4][4] ={{1.0, 0.5, 0.25, 0.125},//rx 0
++                                   {0.5, 1.0, 0.5, 0.25},  //rx 1
++                                   {0.25, 0.5, 1.0, 0.5},  //rx 2
++                                   {0.125, 0.25, 0.5, 1.0}};//rx 3
++              #endif
++              // sum up signals from different Tx antennas
++              r_re[ap][i] = 0;
++              r_im[ap][i] = 0;
++             
++              for (int aa=0; aa<n_tx; aa++) 
++              {
++                r_re[ap][i] += s_re[aa][i]*H_awgn[ap][aa];
++                r_im[ap][i] += s_im[aa][i]*H_awgn[ap][aa];
++              }
++            }
++            
++            sigma_dB = 10 * log10((double)atxlev[ap] * ((double)frame_parms->ofdm_symbol_size/(12*nb_rb))) - SNR;;
++            sigma    = pow(10,sigma_dB/10);
++		
+             ((int16_t*) &gNB->common_vars.rxdata[ap][slot_offset])[(2*i)   + (delay*2)] = (int16_t)((r_re[ap][i]) + (sqrt(sigma/2)*gaussdouble(0.0,1.0))); // convert to fixed point
+             ((int16_t*) &gNB->common_vars.rxdata[ap][slot_offset])[(2*i)+1 + (delay*2)] = (int16_t)((r_im[ap][i]) + (sqrt(sigma/2)*gaussdouble(0.0,1.0)));
++            
++            #if 0
++            ((int16_t*) &gNB->common_vars.rxdata[ap][slot_offset])[(2*i)   + (delay*2)] = (int16_t)((r_re[ap][i])); // convert to fixed point
++            ((int16_t*) &gNB->common_vars.rxdata[ap][slot_offset])[(2*i)+1 + (delay*2)] = (int16_t)((r_im[ap][i]));
++
++            
++            ((int16_t*) &gNB->common_vars.rxdata[ap][slot_offset])[(2*i) ] = ((int16_t*) &UE->common_vars.txdata[ap][slot_offset])[(2*i) ]; // convert to fixed point
++            ((int16_t*) &gNB->common_vars.rxdata[ap][slot_offset])[(2*i)+1] = ((int16_t*) &UE->common_vars.txdata[ap][slot_offset])[(2*i)+1 ];
++            #endif
++
+             /* Add phase noise if enabled */
+             if (pdu_bit_map & PUSCH_PDU_BITMAP_PUSCH_PTRS) {
+               phase_noise(ts, &((int16_t*)&gNB->common_vars.rxdata[ap][slot_offset])[(2*i)],
+@@ -1250,7 +1324,8 @@ int main(int argc, char **argv)
+       } /*End input_fd */
+ 
+ 
+-      if(pusch_pdu->pdu_bit_map & PUSCH_PDU_BITMAP_PUSCH_PTRS) {
++      if(pusch_pdu->pdu_bit_map & PUSCH_PDU_BITMAP_PUSCH_PTRS) 
++      {
+         set_ptrs_symb_idx(&ptrsSymPos,
+                           pusch_pdu->nr_of_symbols,
+                           pusch_pdu->start_symbol_index,
+@@ -1268,24 +1343,37 @@ int main(int argc, char **argv)
+ 	gNB->UL_INFO.rx_ind.number_of_pdus = 0;
+ 	gNB->UL_INFO.crc_ind.number_crcs = 0;
+ 
+-        phy_procedures_gNB_common_RX(gNB, frame, slot);
++    phy_procedures_gNB_common_RX(gNB, frame, slot);
+ 
+-        ul_proc_error = phy_procedures_gNB_uespec_RX(gNB, frame, slot);
++    ul_proc_error = phy_procedures_gNB_uespec_RX(gNB, frame, slot);
+ 
+-	if (n_trials==1 && round==0) {
++	if (n_trials==1 && round==0) 
++    {
+ 	  LOG_M("rxsig0.m","rx0",&gNB->common_vars.rxdata[0][slot_offset],slot_length,1,1);
+ 
+-	  LOG_M("rxsigF0.m","rxsF0",gNB->common_vars.rxdataF[0]+start_symbol*frame_parms->ofdm_symbol_size,nb_symb_sch*frame_parms->ofdm_symbol_size,1,1);
+-
++	  LOG_M("rxsigF0.m","rxsF0",gNB->common_vars.rxdataF[0],14*frame_parms->ofdm_symbol_size,1,1);
++      if (precod_nbr_layers > 1)
++      {
++          LOG_M("rxsig1.m","rx1",&gNB->common_vars.rxdata[1][slot_offset],slot_length,1,1);
++          LOG_M("rxsigF1.m","rxsF1",gNB->common_vars.rxdataF[1],14*frame_parms->ofdm_symbol_size,1,1);
++          if (precod_nbr_layers==4)
++          {      
++            LOG_M("rxsig2.m","rx2",&gNB->common_vars.rxdata[2][slot_offset],slot_length,1,1);
++            LOG_M("rxsig3.m","rx3",&gNB->common_vars.rxdata[3][slot_offset],slot_length,1,1);
++            
++            LOG_M("rxsigF2.m","rxsF2",gNB->common_vars.rxdataF[2],14*frame_parms->ofdm_symbol_size,1,1);
++            LOG_M("rxsigF3.m","rxsF3",gNB->common_vars.rxdataF[3],14*frame_parms->ofdm_symbol_size,1,1);
++          }
++      }
+ 	}
+ 
+ 
+ 	if (n_trials == 1  && round==0) {
+-#ifdef __AVX2__
++  #ifdef __AVX2__
+ 	  int off = ((nb_rb&1) == 1)? 4:0;
+-#else
++  #else
+ 	  int off = 0;
+-#endif
++  #endif
+ 
+ 	  LOG_M("rxsigF0_ext.m","rxsF0_ext",
+ 		&gNB->pusch_vars[0]->rxdataF_ext[0][start_symbol*NR_NB_SC_PER_RB * pusch_pdu->rb_size],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
+@@ -1298,11 +1386,77 @@ int main(int argc, char **argv)
+ 		(nb_symb_sch-1)*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
+ 	  LOG_M("rxsigF0_comp.m","rxsF0_comp",
+ 		&gNB->pusch_vars[0]->rxdataF_comp[0][start_symbol*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
+-    LOG_M("chmagF0.m","chmF0",
+-    &gNB->pusch_vars[0]->ul_ch_mag[0][start_symbol*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
+-    LOG_M("chmagbF0.m","chmbF0",
+-    &gNB->pusch_vars[0]->ul_ch_magb[0][start_symbol*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
+-    if (n_rx == 2) {
++      LOG_M("chmagF0.m","chmF0",
++        &gNB->pusch_vars[0]->ul_ch_mag[0][start_symbol*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
++      LOG_M("chmagbF0.m","chmbF0",
++        &gNB->pusch_vars[0]->ul_ch_magb[0][start_symbol*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
++      LOG_M("rxsigF0_llrlayers0.m","rxsF0_llrlayers0",
++            &gNB->pusch_vars[0]->llr_layers[0][0],(nb_symb_sch-1)*NR_NB_SC_PER_RB * pusch_pdu->rb_size * mod_order,1,0);
++    
++      if (precod_nbr_layers==2)
++      {
++         LOG_M("rxsigF1_ext.m","rxsF1_ext",
++            &gNB->pusch_vars[0]->rxdataF_ext[1][start_symbol*NR_NB_SC_PER_RB * pusch_pdu->rb_size],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
++			  
++        LOG_M("chestF3.m","chF3",
++            &gNB->pusch_vars[0]->ul_ch_estimates[3][start_symbol*frame_parms->ofdm_symbol_size],frame_parms->ofdm_symbol_size,1,1);
++        
++        LOG_M("chestF3_ext.m","chF3_ext",
++        &gNB->pusch_vars[0]->ul_ch_estimates_ext[3][(start_symbol+1)*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],
++              (nb_symb_sch-1)*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
++
++        LOG_M("rxsigF2_comp.m","rxsF2_comp",
++            &gNB->pusch_vars[0]->rxdataF_comp[2][start_symbol*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
++        
++        LOG_M("rxsigF0_llrlayers1.m","rxsF0_llrlayers1",
++        &gNB->pusch_vars[0]->llr_layers[1][0],(nb_symb_sch-1)*NR_NB_SC_PER_RB * pusch_pdu->rb_size * mod_order,1,0);
++
++        }
++        
++        if (precod_nbr_layers==4)
++        {
++          LOG_M("rxsigF1_ext.m","rxsF1_ext",
++            &gNB->pusch_vars[0]->rxdataF_ext[1][start_symbol*NR_NB_SC_PER_RB * pusch_pdu->rb_size],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
++         LOG_M("rxsigF2_ext.m","rxsF2_ext",
++            &gNB->pusch_vars[0]->rxdataF_ext[2][start_symbol*NR_NB_SC_PER_RB * pusch_pdu->rb_size],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
++        LOG_M("rxsigF3_ext.m","rxsF3_ext",
++            &gNB->pusch_vars[0]->rxdataF_ext[3][start_symbol*NR_NB_SC_PER_RB * pusch_pdu->rb_size],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
++        
++        LOG_M("chestF5.m","chF5",
++            &gNB->pusch_vars[0]->ul_ch_estimates[5][start_symbol*frame_parms->ofdm_symbol_size],frame_parms->ofdm_symbol_size,1,1);
++        LOG_M("chestF10.m","chF10",
++            &gNB->pusch_vars[0]->ul_ch_estimates[10][start_symbol*frame_parms->ofdm_symbol_size],frame_parms->ofdm_symbol_size,1,1);
++        LOG_M("chestF15.m","chF15",
++            &gNB->pusch_vars[0]->ul_ch_estimates[15][start_symbol*frame_parms->ofdm_symbol_size],frame_parms->ofdm_symbol_size,1,1);
++
++
++        LOG_M("chestF5_ext.m","chF5_ext",
++        &gNB->pusch_vars[0]->ul_ch_estimates_ext[5][(start_symbol+1)*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],
++              (nb_symb_sch-1)*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
++        LOG_M("chestF10_ext.m","chF10_ext",
++        &gNB->pusch_vars[0]->ul_ch_estimates_ext[10][(start_symbol+1)*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],
++              (nb_symb_sch-1)*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
++        LOG_M("chestF15_ext.m","chF15_ext",
++        &gNB->pusch_vars[0]->ul_ch_estimates_ext[15][(start_symbol+1)*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],
++             (nb_symb_sch-1)*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
++
++
++        LOG_M("rxsigF4_comp.m","rxsF4_comp",
++            &gNB->pusch_vars[0]->rxdataF_comp[4][start_symbol*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
++        LOG_M("rxsigF8_comp.m","rxsF8_comp",
++            &gNB->pusch_vars[0]->rxdataF_comp[8][start_symbol*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
++        LOG_M("rxsigF12_comp.m","rxsF12_comp",
++            &gNB->pusch_vars[0]->rxdataF_comp[12][start_symbol*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
++        LOG_M("rxsigF0_llrlayers1.m","rxsF0_llrlayers1",
++        &gNB->pusch_vars[0]->llr_layers[1][0],(nb_symb_sch-1)*NR_NB_SC_PER_RB * pusch_pdu->rb_size * mod_order,1,0);
++        LOG_M("rxsigF0_llrlayers2.m","rxsF0_llrlayers2",
++        &gNB->pusch_vars[0]->llr_layers[2][0],(nb_symb_sch-1)*NR_NB_SC_PER_RB * pusch_pdu->rb_size * mod_order,1,0);
++        LOG_M("rxsigF0_llrlayers3.m","rxsF0_llrlayers3",
++        &gNB->pusch_vars[0]->llr_layers[3][0],(nb_symb_sch-1)*NR_NB_SC_PER_RB * pusch_pdu->rb_size * mod_order,1,0);
++    }
++		
++    #if 0		
++	if (n_rx == 2) {
+       LOG_MM("rxsigF0_comp.m","rxsF1_comp",
+       &gNB->pusch_vars[0]->rxdataF_comp[1][start_symbol*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
+       LOG_MM("rxsigF0_ext.m","rxsF1_ext",
+@@ -1349,9 +1503,10 @@ int main(int argc, char **argv)
+       LOG_MM("chmagbF0.m","chmbF3",
+       &gNB->pusch_vars[0]->ul_ch_magb[3][start_symbol*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
+     }
++	#endif
+ 
+ 	  LOG_M("rxsigF0_llr.m","rxsF0_llr",
+-		&gNB->pusch_vars[0]->llr[0],(nb_symb_sch-1)*NR_NB_SC_PER_RB * pusch_pdu->rb_size * mod_order,1,0);
++		&gNB->pusch_vars[0]->llr[0],precod_nbr_layers*(nb_symb_sch-1)*NR_NB_SC_PER_RB * pusch_pdu->rb_size * mod_order,1,0);
+ 	}
+         ////////////////////////////////////////////////////////////
+ 
+diff --git a/openair2/LAYER2/NR_MAC_COMMON/nr_mac_common.c b/openair2/LAYER2/NR_MAC_COMMON/nr_mac_common.c
+index 3f4be9dd58..7e5be9eeba 100644
+--- a/openair2/LAYER2/NR_MAC_COMMON/nr_mac_common.c
++++ b/openair2/LAYER2/NR_MAC_COMMON/nr_mac_common.c
+@@ -2708,6 +2708,9 @@ uint16_t nr_dci_size(const NR_BWP_DownlinkCommon_t *initialDownlinkBWP,
+       LOG_D(NR_MAC,"dci_pdu->srs_resource_indicator.nbits %d\n",dci_pdu->srs_resource_indicator.nbits);
+       // Precoding info and number of layers
+       long transformPrecoder = get_transformPrecoding(initialUplinkBWP, pusch_Config, ubwpd, (uint8_t*)&format, rnti_type, 0);
++       
++	  pusch_antenna_ports = *pusch_Config->maxRank;
++	   
+       dci_pdu->precoding_information.nbits=0;
+       if (pusch_Config && 
+           pusch_Config->txConfig != NULL){
+diff --git a/openair2/LAYER2/NR_MAC_UE/nr_ue_scheduler.c b/openair2/LAYER2/NR_MAC_UE/nr_ue_scheduler.c
+index c23d4bb073..963723725f 100644
+--- a/openair2/LAYER2/NR_MAC_UE/nr_ue_scheduler.c
++++ b/openair2/LAYER2/NR_MAC_UE/nr_ue_scheduler.c
+@@ -190,7 +190,7 @@ void ul_layers_config(NR_UE_MAC_INST_t * mac, nfapi_nr_ue_pusch_pdu_t *pusch_con
+     else
+       transformPrecoder = NR_PUSCH_Config__transformPrecoder_disabled;
+   }
+-
++  pusch_config_pdu->transformPrecoder = transformPrecoder;
+ 
+   /* PRECOD_NBR_LAYERS */
+   if ((*pusch_Config->txConfig == NR_PUSCH_Config__txConfig_nonCodebook));
+@@ -198,11 +198,12 @@ void ul_layers_config(NR_UE_MAC_INST_t * mac, nfapi_nr_ue_pusch_pdu_t *pusch_con
+ 
+   if ((*pusch_Config->txConfig == NR_PUSCH_Config__txConfig_codebook)){
+ 
+-    uint8_t n_antenna_port = 0; //FIXME!!!
++    uint8_t n_antenna_port = mac->phy_config.config_req.carrier_config.num_tx_ant;   
+ 
+     if (n_antenna_port == 1); // 1 antenna port and the higher layer parameter txConfig = codebook 0 bits
+ 
+-    if (n_antenna_port == 4){ // 4 antenna port and the higher layer parameter txConfig = codebook
++    if (n_antenna_port == 4)
++    { // 4 antenna port and the higher layer parameter txConfig = codebook
+ 
+       // Table 7.3.1.1.2-2: transformPrecoder=disabled and maxRank = 2 or 3 or 4
+       if ((transformPrecoder == NR_PUSCH_Config__transformPrecoder_disabled)
+@@ -212,54 +213,56 @@ void ul_layers_config(NR_UE_MAC_INST_t * mac, nfapi_nr_ue_pusch_pdu_t *pusch_con
+ 
+         if (*pusch_Config->codebookSubset == NR_PUSCH_Config__codebookSubset_fullyAndPartialAndNonCoherent) {
+           pusch_config_pdu->nrOfLayers = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][0];
+-          pusch_config_pdu->transform_precoding = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][1];
++          pusch_config_pdu->Tpmi = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][1];
+         }
+ 
+         if (*pusch_Config->codebookSubset == NR_PUSCH_Config__codebookSubset_partialAndNonCoherent){
+           pusch_config_pdu->nrOfLayers = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][2];
+-          pusch_config_pdu->transform_precoding = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][3];
++          pusch_config_pdu->Tpmi = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][3];
+         }
+ 
+         if (*pusch_Config->codebookSubset == NR_PUSCH_Config__codebookSubset_nonCoherent){
+           pusch_config_pdu->nrOfLayers = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][4];
+-          pusch_config_pdu->transform_precoding = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][5];
++          pusch_config_pdu->Tpmi = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][5];
+         }
+       }
+ 
+       // Table 7.3.1.1.2-3: transformPrecoder= enabled, or transformPrecoder=disabled and maxRank = 1
+       if (((transformPrecoder == NR_PUSCH_Config__transformPrecoder_enabled)
+         || (transformPrecoder == NR_PUSCH_Config__transformPrecoder_disabled))
+-        && (*pusch_Config->maxRank == 1)){
++        && (*pusch_Config->maxRank == 1))
++      {
+ 
+         if (*pusch_Config->codebookSubset == NR_PUSCH_Config__codebookSubset_fullyAndPartialAndNonCoherent) {
+           pusch_config_pdu->nrOfLayers = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][6];
+-          pusch_config_pdu->transform_precoding = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][7];
++          pusch_config_pdu->Tpmi = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][7];
+         }
+ 
+         if (*pusch_Config->codebookSubset == NR_PUSCH_Config__codebookSubset_partialAndNonCoherent){
+           pusch_config_pdu->nrOfLayers = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][8];
+-          pusch_config_pdu->transform_precoding = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][9];
++          pusch_config_pdu->Tpmi = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][9];
+         }
+ 
+         if (*pusch_Config->codebookSubset == NR_PUSCH_Config__codebookSubset_nonCoherent){
+           pusch_config_pdu->nrOfLayers = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][10];
+-          pusch_config_pdu->transform_precoding = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][11];
++          pusch_config_pdu->Tpmi = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][11];
+         }
+       }
+     }
+ 
+-    if (n_antenna_port == 4){ // 2 antenna port and the higher layer parameter txConfig = codebook
++    if (n_antenna_port == 2)
++    { // 2 antenna port and the higher layer parameter txConfig = codebook
+       // Table 7.3.1.1.2-4: transformPrecoder=disabled and maxRank = 2
+-      if ((transformPrecoder == NR_PUSCH_Config__transformPrecoder_disabled) && (*pusch_Config->maxRank == 2)){
+-
++      if ((transformPrecoder == NR_PUSCH_Config__transformPrecoder_disabled) && (*pusch_Config->maxRank == 2))
++      {
+         if (*pusch_Config->codebookSubset == NR_PUSCH_Config__codebookSubset_fullyAndPartialAndNonCoherent) {
+           pusch_config_pdu->nrOfLayers = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][12];
+-          pusch_config_pdu->transform_precoding = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][13];
++          pusch_config_pdu->Tpmi = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][13];
+         }
+ 
+         if (*pusch_Config->codebookSubset == NR_PUSCH_Config__codebookSubset_nonCoherent){
+           pusch_config_pdu->nrOfLayers = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][14];
+-          pusch_config_pdu->transform_precoding = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][15];
++          pusch_config_pdu->Tpmi = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][15];
+         }
+ 
+       }
+@@ -267,16 +270,17 @@ void ul_layers_config(NR_UE_MAC_INST_t * mac, nfapi_nr_ue_pusch_pdu_t *pusch_con
+       // Table 7.3.1.1.2-5: transformPrecoder= enabled, or transformPrecoder= disabled and maxRank = 1
+       if (((transformPrecoder == NR_PUSCH_Config__transformPrecoder_enabled)
+         || (transformPrecoder == NR_PUSCH_Config__transformPrecoder_disabled))
+-        && (*pusch_Config->maxRank == 1)){
++        && (*pusch_Config->maxRank == 1))
++      {
+ 
+         if (*pusch_Config->codebookSubset == NR_PUSCH_Config__codebookSubset_fullyAndPartialAndNonCoherent) {
+           pusch_config_pdu->nrOfLayers = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][16];
+-          pusch_config_pdu->transform_precoding = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][17];
++          pusch_config_pdu->Tpmi = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][17];
+         }
+ 
+         if (*pusch_Config->codebookSubset == NR_PUSCH_Config__codebookSubset_nonCoherent){
+           pusch_config_pdu->nrOfLayers = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][18];
+-          pusch_config_pdu->transform_precoding = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][19];
++          pusch_config_pdu->Tpmi = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][19];
+         }
+ 
+       }
+@@ -285,9 +289,9 @@ void ul_layers_config(NR_UE_MAC_INST_t * mac, nfapi_nr_ue_pusch_pdu_t *pusch_con
+ 
+   /*-------------------- Changed to enable Transform precoding in RF SIM------------------------------------------------*/
+ 
+- /*if (pusch_config_pdu->transform_precoding == transform_precoder_enabled) {
++ /*if (pusch_config_pdu->transformPrecoder == transformPrecoder_enabled) {
+ 
+-    pusch_config_dedicated->transform_precoder = transform_precoder_enabled;
++    pusch_config_dedicated->transform_precoder = transformPrecoder_enabled;
+ 
+     if(pusch_Config->dmrs_UplinkForPUSCH_MappingTypeA != NULL) {
+ 
+@@ -309,7 +313,7 @@ void ul_layers_config(NR_UE_MAC_INST_t * mac, nfapi_nr_ue_pusch_pdu_t *pusch_con
+ 
+     }
+   } else
+-    pusch_config_dedicated->transform_precoder = transform_precoder_disabled;*/
++    pusch_config_dedicated->transformPrecoder = ttransformPrecoder_disabled;*/
+ }
+ 
+ // todo: this function shall be reviewed completely because of the many comments left by the author
+@@ -342,7 +346,7 @@ void ul_ports_config(NR_UE_MAC_INST_t * mac, nfapi_nr_ue_pusch_pdu_t *pusch_conf
+   }
+   long *max_length = NULL;
+   long *dmrs_type = NULL;
+-  LOG_D(NR_MAC,"transformPrecoder %s\n",transformPrecoder==NR_PUSCH_Config__transformPrecoder_disabled?"disabled":"enabled");
++  LOG_I(NR_MAC,"transformPrecoder %s\n",transformPrecoder==NR_PUSCH_Config__transformPrecoder_disabled?"disabled":"enabled");
+ 
+   if (pusch_Config->dmrs_UplinkForPUSCH_MappingTypeA) {
+     max_length = pusch_Config->dmrs_UplinkForPUSCH_MappingTypeA->choice.setup->maxLength;
+@@ -353,7 +357,7 @@ void ul_ports_config(NR_UE_MAC_INST_t * mac, nfapi_nr_ue_pusch_pdu_t *pusch_conf
+     dmrs_type = pusch_Config->dmrs_UplinkForPUSCH_MappingTypeB->choice.setup->dmrs_Type;
+   }
+ 
+-  LOG_D(NR_MAC,"MappingType%s max_length %s, dmrs_type %s, antenna_ports %d\n",pusch_Config->dmrs_UplinkForPUSCH_MappingTypeA?"A":"B",max_length?"len2":"len1",dmrs_type?"type2":"type1",dci->antenna_ports.val);
++  LOG_I(NR_MAC,"MappingType%s max_length %s, dmrs_type %s, antenna_ports %d\n",pusch_Config->dmrs_UplinkForPUSCH_MappingTypeA?"A":"B",max_length?"len2":"len1",dmrs_type?"type2":"type1",dci->antenna_ports.val);
+   if ((transformPrecoder == NR_PUSCH_Config__transformPrecoder_enabled) &&
+     (dmrs_type == NULL) && (max_length == NULL)) { // tables 7.3.1.1.2-6
+       pusch_config_pdu->num_dmrs_cdm_grps_no_data = 2; //TBC
+@@ -369,35 +373,32 @@ void ul_ports_config(NR_UE_MAC_INST_t * mac, nfapi_nr_ue_pusch_pdu_t *pusch_conf
+   }
+ 
+   if ((transformPrecoder == NR_PUSCH_Config__transformPrecoder_disabled) &&
+-    (dmrs_type == NULL) && (max_length == NULL)) { // tables 7.3.1.1.2-8/9/10/11
++    (dmrs_type == NULL) && (max_length == NULL)) 
++  { // tables 7.3.1.1.2-8/9/10/11
++    
++    if ((*pusch_Config->txConfig == NR_PUSCH_Config__txConfig_codebook))
++    {
++      rank = pusch_config_pdu->nrOfLayers;
++    } 
+ 
+     if (rank == 1) {
+-      pusch_config_pdu->num_dmrs_cdm_grps_no_data = (dci->antenna_ports.val > 1)?2:1; //TBC
+-      pusch_config_pdu->dmrs_ports = (dci->antenna_ports.val > 1)?(dci->antenna_ports.val-2):(dci->antenna_ports.val); //TBC
++      pusch_config_pdu->num_dmrs_cdm_grps_no_data = (dci->antenna_ports.val > 1)?2:1;
++      pusch_config_pdu->dmrs_ports =1<<((dci->antenna_ports.val > 1)?(dci->antenna_ports.val-2):(dci->antenna_ports.val));
+     }
+ 
+     if (rank == 2){
+-      pusch_config_pdu->num_dmrs_cdm_grps_no_data = (dci->antenna_ports.val > 0)?2:1; //TBC
+-      pusch_config_pdu->dmrs_ports = 0; //FIXME
+-      //pusch_config_pdu->dmrs_ports[0] = (dci->antenna_ports > 1)?(dci->antenna_ports > 2 ?0:2):0;
+-      //pusch_config_pdu->dmrs_ports[1] = (dci->antenna_ports > 1)?(dci->antenna_ports > 2 ?2:3):1;
++      pusch_config_pdu->num_dmrs_cdm_grps_no_data = (dci->antenna_ports.val > 0)?2:1;
++      pusch_config_pdu->dmrs_ports = (dci->antenna_ports.val > 1)?((dci->antenna_ports.val> 2)?0x5:0xc):0x3;
+     }
+ 
+     if (rank == 3){
+-      pusch_config_pdu->num_dmrs_cdm_grps_no_data = 2; //TBC
+-      pusch_config_pdu->dmrs_ports = 0; //FIXME
+-      //pusch_config_pdu->dmrs_ports[0] = 0;
+-      //pusch_config_pdu->dmrs_ports[1] = 1;
+-      //pusch_config_pdu->dmrs_ports[2] = 2;
++      pusch_config_pdu->num_dmrs_cdm_grps_no_data = 2;
++      pusch_config_pdu->dmrs_ports = ((1<<rank)-1);
+     }
+ 
+     if (rank == 4){
+-      pusch_config_pdu->num_dmrs_cdm_grps_no_data = 2; //TBC
+-      pusch_config_pdu->dmrs_ports = 0; //FIXME
+-      //pusch_config_pdu->dmrs_ports[0] = 0;
+-      //pusch_config_pdu->dmrs_ports[1] = 1;
+-      //pusch_config_pdu->dmrs_ports[2] = 2;
+-      //pusch_config_pdu->dmrs_ports[3] = 3;
++      pusch_config_pdu->num_dmrs_cdm_grps_no_data = 2;
++      pusch_config_pdu->dmrs_ports = ((1<<rank)-1);
+     }
+   }
+ 
+@@ -508,7 +509,7 @@ void ul_ports_config(NR_UE_MAC_INST_t * mac, nfapi_nr_ue_pusch_pdu_t *pusch_conf
+       //pusch_config_pdu->n_front_load_symb = table_7_3_1_1_2_23[dci->antenna_ports.val][5]; //FIXME
+     }
+   }
+-  LOG_D(NR_MAC,"num_dmrs_cdm_grps_no_data %d, dmrs_ports %d\n",pusch_config_pdu->num_dmrs_cdm_grps_no_data,pusch_config_pdu->dmrs_ports);
++  LOG_I(NR_MAC,"num_dmrs_cdm_grps_no_data %d, dmrs_ports %d\n",pusch_config_pdu->num_dmrs_cdm_grps_no_data,pusch_config_pdu->dmrs_ports);
+ }
+ 
+ // Configuration of Msg3 PDU according to clauses:
+@@ -542,6 +543,7 @@ int nr_config_pusch_pdu(NR_UE_MAC_INST_t *mac,
+   pusch_config_pdu->dmrs_config_type = pusch_dmrs_type1;
+   pusch_config_pdu->pdu_bit_map      = PUSCH_PDU_BITMAP_PUSCH_DATA;
+   pusch_config_pdu->nrOfLayers       = 1;
++  pusch_config_pdu->Tpmi             = 0;
+   pusch_config_pdu->rnti             = rnti;
+   NR_BWP_UplinkCommon_t *initialUplinkBWP;
+   if (mac->scc) initialUplinkBWP = mac->scc->uplinkConfigCommon->initialUplinkBWP;
+@@ -634,7 +636,7 @@ int nr_config_pusch_pdu(NR_UE_MAC_INST_t *mac,
+     pusch_config_pdu->scid = 0;
+ 
+     // Transform precoding according to 6.1.3 UE procedure for applying transform precoding on PUSCH in 38.214
+-    pusch_config_pdu->transform_precoding = get_transformPrecoding(initialUplinkBWP, pusch_Config, NULL, NULL, NR_RNTI_TC, 0); // TBR fix rnti and take out
++    pusch_config_pdu->transformPrecoder = get_transformPrecoding(initialUplinkBWP, pusch_Config, NULL, NULL, NR_RNTI_TC, 0); // TBR fix rnti and take out
+ 
+     // Resource allocation in frequency domain according to 6.1.2.2 in TS 38.214
+     pusch_config_pdu->resource_alloc = (mac->cg) ? pusch_Config->resourceAllocation : 1;
+@@ -680,7 +682,7 @@ int nr_config_pusch_pdu(NR_UE_MAC_INST_t *mac,
+ 
+     /* Transform precoding */
+     if (rnti_type != NR_RNTI_CS || (rnti_type == NR_RNTI_CS && dci->ndi == 1)) {
+-      pusch_config_pdu->transform_precoding = get_transformPrecoding(initialUplinkBWP, pusch_Config, NULL, dci_format, rnti_type, 0);
++      pusch_config_pdu->transformPrecoder = get_transformPrecoding(initialUplinkBWP, pusch_Config, NULL, dci_format, rnti_type, 0);
+     }
+ 
+     /*DCI format-related configuration*/
+@@ -716,7 +718,7 @@ int nr_config_pusch_pdu(NR_UE_MAC_INST_t *mac,
+ 
+     /* TRANSFORM PRECODING ------------------------------------------------------------------------------------------*/
+ 
+-    if (pusch_config_pdu->transform_precoding == NR_PUSCH_Config__transformPrecoder_enabled) {
++    if (pusch_config_pdu->transformPrecoder == NR_PUSCH_Config__transformPrecoder_enabled) {
+ 
+       pusch_config_pdu->num_dmrs_cdm_grps_no_data = 2;
+ 
+@@ -761,7 +763,7 @@ int nr_config_pusch_pdu(NR_UE_MAC_INST_t *mac,
+     pusch_config_pdu->mcs_index = dci->mcs;
+ 
+     /* MCS TABLE */
+-    if (pusch_config_pdu->transform_precoding == NR_PUSCH_Config__transformPrecoder_disabled) {
++    if (pusch_config_pdu->transformPrecoder == NR_PUSCH_Config__transformPrecoder_disabled) {
+       pusch_config_pdu->mcs_table = get_pusch_mcs_table(pusch_Config ? pusch_Config->mcs_Table : NULL, 0, *dci_format, rnti_type, target_ss, false);
+     } else {
+       pusch_config_pdu->mcs_table = get_pusch_mcs_table(pusch_Config ? pusch_Config->mcs_TableTransformPrecoder : NULL, 1, *dci_format, rnti_type, target_ss, false);
+@@ -798,9 +800,14 @@ int nr_config_pusch_pdu(NR_UE_MAC_INST_t *mac,
+                                mappingtype, add_pos, dmrslength,
+                                pusch_config_pdu->start_symbol_index,
+                                mac->scc ? mac->scc->dmrs_TypeA_Position : mac->mib->dmrs_TypeA_Position);
+-    if ((mac->ULbwp[0] && pusch_config_pdu->transform_precoding == NR_PUSCH_Config__transformPrecoder_disabled))
+-      pusch_config_pdu->num_dmrs_cdm_grps_no_data = 1;
+-    else if (*dci_format == NR_UL_DCI_FORMAT_0_0 || (mac->ULbwp[0] && pusch_config_pdu->transform_precoding == NR_PUSCH_Config__transformPrecoder_enabled))
++    if (mac->ULbwp[0] && pusch_config_pdu->transformPrecoder == NR_PUSCH_Config__transformPrecoder_disabled)
++    { 
++	  if (*dci_format != NR_UL_DCI_FORMAT_0_1)
++	  {
++        pusch_config_pdu->num_dmrs_cdm_grps_no_data = 1;
++	  }
++    }
++    else if (*dci_format == NR_UL_DCI_FORMAT_0_0 || (mac->ULbwp[0] && pusch_config_pdu->transformPrecoder == NR_PUSCH_Config__transformPrecoder_enabled))
+       pusch_config_pdu->num_dmrs_cdm_grps_no_data = 2;
+ 
+     // Num PRB Overhead from PUSCH-ServingCellConfig
+@@ -821,7 +828,7 @@ int nr_config_pusch_pdu(NR_UE_MAC_INST_t *mac,
+ 	mac->ULbwp[0]->bwp_Dedicated->pusch_Config->choice.setup &&
+ 	mac->ULbwp[0]->bwp_Dedicated->pusch_Config->choice.setup->dmrs_UplinkForPUSCH_MappingTypeB &&
+ 	mac->ULbwp[0]->bwp_Dedicated->pusch_Config->choice.setup->dmrs_UplinkForPUSCH_MappingTypeB->choice.setup->phaseTrackingRS) {
+-      if (pusch_config_pdu->transform_precoding == NR_PUSCH_Config__transformPrecoder_disabled) {
++      if (pusch_config_pdu->transformPrecoder == NR_PUSCH_Config__transformPrecoder_disabled) {
+         nfapi_nr_ue_ptrs_ports_t ptrs_ports_list;
+         pusch_config_pdu->pusch_ptrs.ptrs_ports_list = &ptrs_ports_list;
+         valid_ptrs_setup = set_ul_ptrs_values(mac->ULbwp[0]->bwp_Dedicated->pusch_Config->choice.setup->dmrs_UplinkForPUSCH_MappingTypeB->choice.setup->phaseTrackingRS->choice.setup,
+@@ -909,6 +916,7 @@ NR_UE_L2_STATE_t nr_ue_scheduler(nr_downlink_indication_t *dl_info, nr_uplink_in
+ 
+       fill_scheduled_response(&scheduled_response, &dcireq.dl_config_req, NULL, NULL, mod_id, cc_id, rx_frame, rx_slot, dl_info->thread_id);
+       if(mac->if_module != NULL && mac->if_module->scheduled_response != NULL)
++        LOG_I(NR_MAC,"1# scheduled_response transmitted, %d, %d\n", rx_frame, rx_slot);
+         mac->if_module->scheduled_response(&scheduled_response);
+     }
+     else {
+@@ -1022,6 +1030,7 @@ NR_UE_L2_STATE_t nr_ue_scheduler(nr_downlink_indication_t *dl_info, nr_uplink_in
+ 
+         fill_scheduled_response(&scheduled_response, NULL, ul_config, &tx_req, mod_id, cc_id, rx_frame, rx_slot, ul_info->thread_id);
+         if(mac->if_module != NULL && mac->if_module->scheduled_response != NULL){
++          LOG_I(NR_MAC,"3# scheduled_response transmitted,%d, %d\n", frame_tx, slot_tx);
+           mac->if_module->scheduled_response(&scheduled_response);
+         }
+         pthread_mutex_lock(&ul_config->mutex_ul_config);
+diff --git a/openair2/LAYER2/NR_MAC_gNB/config.c b/openair2/LAYER2/NR_MAC_gNB/config.c
+index 1cb3fdb708..d9c98852a3 100644
+--- a/openair2/LAYER2/NR_MAC_gNB/config.c
++++ b/openair2/LAYER2/NR_MAC_gNB/config.c
+@@ -388,7 +388,7 @@ void config_common(int Mod_idP, int ssb_SubcarrierOffset, int pdsch_AntennaPorts
+   cfg->carrier_config.num_rx_ant.value = pusch_AntennaPorts;
+   AssertFatal(pusch_AntennaPorts > 0 && pusch_AntennaPorts < 13, "pusch_AntennaPorts in 1...12\n");
+   cfg->carrier_config.num_rx_ant.tl.tag = NFAPI_NR_CONFIG_NUM_RX_ANT_TAG;
+-  LOG_I(NR_MAC,"Set TX/RX antenna number to %d (num ssb %d: %x,%x)\n",cfg->carrier_config.num_tx_ant.value,num_ssb,cfg->ssb_table.ssb_mask_list[0].ssb_mask.value,cfg->ssb_table.ssb_mask_list[1].ssb_mask.value);
++  LOG_I(NR_MAC,"Set RX antenna number to %d, Set TX antenna number to %d (num ssb %d: %x,%x)\n",cfg->carrier_config.num_tx_ant.value,cfg->carrier_config.num_rx_ant.value,num_ssb,cfg->ssb_table.ssb_mask_list[0].ssb_mask.value,cfg->ssb_table.ssb_mask_list[1].ssb_mask.value);
+   AssertFatal(cfg->carrier_config.num_tx_ant.value > 0,"carrier_config.num_tx_ant.value %d !\n",cfg->carrier_config.num_tx_ant.value );
+   cfg->num_tlv++;
+   cfg->num_tlv++;
+diff --git a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_RA.c b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_RA.c
+index 6ac7f3f82e..5de36bb2db 100644
+--- a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_RA.c
++++ b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_RA.c
+@@ -977,9 +977,9 @@ void fill_msg3_pusch_pdu(nfapi_nr_pusch_pdu_t *pusch_pdu,
+   pusch_pdu->target_code_rate = nr_get_code_rate_ul(pusch_pdu->mcs_index,pusch_pdu->mcs_table);
+   pusch_pdu->qam_mod_order = nr_get_Qm_ul(pusch_pdu->mcs_index,pusch_pdu->mcs_table);
+   if (scc->uplinkConfigCommon->initialUplinkBWP->rach_ConfigCommon->choice.setup->msg3_transformPrecoder == NULL)
+-    pusch_pdu->transform_precoding = 1;
++    pusch_pdu->transformPrecoder = 1;
+   else
+-    pusch_pdu->transform_precoding = 0;
++    pusch_pdu->transformPrecoder = 0;
+   pusch_pdu->data_scrambling_id = *scc->physCellId;
+   pusch_pdu->nrOfLayers = 1;
+   pusch_pdu->ul_dmrs_symb_pos = get_l_prime(nr_of_symbols,mappingtype,pusch_dmrs_pos2,pusch_len1,start_symbol_index, scc->dmrs_TypeA_Position);
+diff --git a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_dlsch.c b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_dlsch.c
+index abc007197d..9cbbe7d9a1 100644
+--- a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_dlsch.c
++++ b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_dlsch.c
+@@ -1139,13 +1139,14 @@ void nr_schedule_ue_spec(module_id_t module_id,
+     dci_payload.dmrs_sequence_initialization.val = pdsch_pdu->SCID;
+     LOG_D(NR_MAC,
+           "%4d.%2d DCI type 1 payload: freq_alloc %d (%d,%d,%d), "
+-          "time_alloc %d, vrb to prb %d, mcs %d tb_scaling %d ndi %d rv %d tpc %d ti %d\n",
++          "nrOfLayers %d, time_alloc %d, vrb to prb %d, mcs %d tb_scaling %d ndi %d rv %d tpc %d ti %d\n",
+           frame,
+           slot,
+           dci_payload.frequency_domain_assignment.val,
+           pdsch_pdu->rbStart,
+           pdsch_pdu->rbSize,
+           pdsch_pdu->BWPSize,
++          pdsch_pdu->nrOfLayers,
+           dci_payload.time_domain_assignment.val,
+           dci_payload.vrb_to_prb_mapping.val,
+           dci_payload.mcs,
+diff --git a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_phytest.c b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_phytest.c
+index f1005cea70..8f7c856a30 100644
+--- a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_phytest.c
++++ b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_phytest.c
+@@ -405,6 +405,7 @@ void nr_preprocessor_phytest(module_id_t module_id,
+ 
+ uint32_t target_ul_mcs = 9;
+ uint32_t target_ul_bw = 50;
++uint32_t target_ul_Nl = 1;
+ uint64_t ulsch_slot_bitmap = (1 << 8);
+ bool nr_ul_preprocessor_phytest(module_id_t module_id, frame_t frame, sub_frame_t slot)
+ {
+@@ -446,15 +447,21 @@ bool nr_ul_preprocessor_phytest(module_id_t module_id, frame_t frame, sub_frame_
+ 
+   const long f = sched_ctrl->search_space->searchSpaceType->choice.ue_Specific->dci_Formats;
+   const int dci_format = f ? NR_UL_DCI_FORMAT_0_1 : NR_UL_DCI_FORMAT_0_0;
+-  const uint8_t num_dmrs_cdm_grps_no_data = 1;
++  uint8_t num_dmrs_cdm_grps_no_data = 1;
++  if ((target_ul_Nl==4)||(target_ul_Nl==3))
++  {
++    num_dmrs_cdm_grps_no_data = 2;
++  }
++  
+   /* we want to avoid a lengthy deduction of DMRS and other parameters in
+    * every TTI if we can save it, so check whether dci_format, TDA, or
+    * num_dmrs_cdm_grps_no_data has changed and only then recompute */
+   NR_pusch_semi_static_t *ps = &sched_ctrl->pusch_semi_static;
+   if (ps->time_domain_allocation != tda
+       || ps->dci_format != dci_format
++      || ps->nrOfLayers != target_ul_Nl
+       || ps->num_dmrs_cdm_grps_no_data != num_dmrs_cdm_grps_no_data)
+-    nr_set_pusch_semi_static(scc, sched_ctrl->active_ubwp, NULL,dci_format, tda, num_dmrs_cdm_grps_no_data, ps);
++    nr_set_pusch_semi_static(scc, sched_ctrl->active_ubwp, NULL,dci_format, tda, num_dmrs_cdm_grps_no_data,target_ul_Nl,ps);
+ 
+   uint16_t rbStart = 0;
+   uint16_t rbSize;
+@@ -525,6 +532,7 @@ bool nr_ul_preprocessor_phytest(module_id_t module_id, frame_t frame, sub_frame_
+   sched_pusch->ul_harq_pid = sched_ctrl->retrans_ul_harq.head;
+ 
+   /* Calculate TBS from MCS */
++  ps->nrOfLayers = target_ul_Nl;
+   sched_pusch->R = nr_get_code_rate_ul(mcs, ps->mcs_table);
+   sched_pusch->Qm = nr_get_Qm_ul(mcs, ps->mcs_table);
+   if (ps->pusch_Config->tp_pi2BPSK
+@@ -539,7 +547,7 @@ bool nr_ul_preprocessor_phytest(module_id_t module_id, frame_t frame, sub_frame_
+                                         ps->N_PRB_DMRS * ps->num_dmrs_symb,
+                                         0, // nb_rb_oh
+                                         0,
+-                                        1 /* NrOfLayers */)
++                                        ps->nrOfLayers /* NrOfLayers */)
+                          >> 3;
+ 
+   /* mark the corresponding RBs as used */
+diff --git a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_primitives.c b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_primitives.c
+index 1c0cb0353e..4583da876f 100644
+--- a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_primitives.c
++++ b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_primitives.c
+@@ -355,6 +355,7 @@ void nr_set_pusch_semi_static(const NR_ServingCellConfigCommon_t *scc,
+                               long dci_format,
+                               int tda,
+                               uint8_t num_dmrs_cdm_grps_no_data,
++                              uint8_t nrOfLayers,
+                               NR_pusch_semi_static_t *ps)
+ {
+   ps->dci_format = dci_format;
+@@ -371,11 +372,11 @@ void nr_set_pusch_semi_static(const NR_ServingCellConfigCommon_t *scc,
+ 
+   ps->pusch_Config = ubwp?ubwp->bwp_Dedicated->pusch_Config->choice.setup:(ubwpd ? ubwpd->pusch_Config->choice.setup : NULL);
+   if (ps->pusch_Config == NULL || !ps->pusch_Config->transformPrecoder)
+-    ps->transform_precoding = !scc->uplinkConfigCommon->initialUplinkBWP->rach_ConfigCommon->choice.setup->msg3_transformPrecoder;
++    ps->transformPrecoder = !scc->uplinkConfigCommon->initialUplinkBWP->rach_ConfigCommon->choice.setup->msg3_transformPrecoder;
+   else
+-    ps->transform_precoding = *ps->pusch_Config->transformPrecoder;
++    ps->transformPrecoder = *ps->pusch_Config->transformPrecoder;
+   const int target_ss = NR_SearchSpace__searchSpaceType_PR_ue_Specific;
+-  if (ps->transform_precoding)
++  if (ps->transformPrecoder)
+     ps->mcs_table = get_pusch_mcs_table(ps->pusch_Config ? ps->pusch_Config->mcs_Table : NULL,
+                                     0,
+                                     ps->dci_format,
+@@ -392,6 +393,7 @@ void nr_set_pusch_semi_static(const NR_ServingCellConfigCommon_t *scc,
+     num_dmrs_cdm_grps_no_data = 2; // in case of transform precoding - no Data sent in DMRS symbol
+   }
+ 
++  ps->nrOfLayers = nrOfLayers;
+   ps->num_dmrs_cdm_grps_no_data = num_dmrs_cdm_grps_no_data;
+ 
+   /* DMRS calculations */
+@@ -689,8 +691,21 @@ void config_uldci(const NR_BWP_Uplink_t *ubwp,
+                     "Non Codebook configuration non supported\n");
+         dci_pdu_rel15->srs_resource_indicator.val = 0; // taking resource 0 for SRS
+       }
++      AssertFatal((pusch_pdu->Tpmi==0), "unsupport Tpmi\n");
++      dci_pdu_rel15->precoding_information.val= 0;
++      if (pusch_pdu->nrOfLayers == 2)
++      {
++        dci_pdu_rel15->precoding_information.val = 4;
++      }
++      else if (pusch_pdu->nrOfLayers == 4)
++      {
++        dci_pdu_rel15->precoding_information.val = 11;
++      }
++
++      // antenna_ports.val = 0 for transform precoder is disabled, dmrs-Type=1, maxLength=1, Rank=1/2/3/4 
+       // Antenna Ports
+-      dci_pdu_rel15->antenna_ports.val = 0; // TODO for now it is hardcoded, it should depends on cdm group no data and rank
++      dci_pdu_rel15->antenna_ports.val = 0; 
++      
+       // DMRS sequence initialization
+       dci_pdu_rel15->dmrs_sequence_initialization.val = pusch_pdu->scid;
+       break;
+@@ -699,11 +714,14 @@ void config_uldci(const NR_BWP_Uplink_t *ubwp,
+   }
+ 
+   LOG_D(NR_MAC,
+-        "%s() ULDCI type 0 payload: freq_alloc %d, time_alloc %d, freq_hop_flag %d, mcs %d tpc %d ndi %d rv %d\n",
++        "%s() ULDCI type 0 payload: dci_format %d, freq_alloc %d, time_alloc %d, freq_hop_flag %d, precoding_information.val %d antenna_ports.val %d mcs %d tpc %d ndi %d rv %d\n",
+         __func__,
++        dci_format,
+         dci_pdu_rel15->frequency_domain_assignment.val,
+         dci_pdu_rel15->time_domain_assignment.val,
+         dci_pdu_rel15->frequency_hopping_flag.val,
++        dci_pdu_rel15->precoding_information.val,
++        dci_pdu_rel15->antenna_ports.val,
+         dci_pdu_rel15->mcs,
+         dci_pdu_rel15->tpc,
+         dci_pdu_rel15->ndi,
+diff --git a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_ulsch.c b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_ulsch.c
+index 9a6fa32b5f..269d6c5359 100644
+--- a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_ulsch.c
++++ b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_ulsch.c
+@@ -905,7 +905,7 @@ bool allocate_ul_retransmission(module_id_t module_id,
+   NR_BWP_t *genericParameters = sched_ctrl->active_ubwp ? &sched_ctrl->active_ubwp->bwp_Common->genericParameters : &scc->uplinkConfigCommon->initialUplinkBWP->genericParameters;
+   int rbStart = 0; // wrt BWP start
+   const uint16_t bwpSize = NRRIV2BW(genericParameters->locationAndBandwidth, MAX_BWP_SIZE);
+-
++  const uint8_t nrOfLayers = 1;
+   const uint8_t num_dmrs_cdm_grps_no_data = (sched_ctrl->active_bwp || ubwpd) ? 1 : 2;
+   const int tda = sched_ctrl->active_ubwp ? RC.nrmac[module_id]->preferred_ul_tda[sched_ctrl->active_ubwp->bwp_Id][slot] : 0;
+   LOG_D(NR_MAC,"retInfo->time_domain_allocation = %d, tda = %d\n", retInfo->time_domain_allocation, tda);
+@@ -926,8 +926,9 @@ bool allocate_ul_retransmission(module_id_t module_id,
+ 
+     if (ps->time_domain_allocation != tda
+         || ps->dci_format != dci_format
++        || ps->nrOfLayers != nrOfLayers
+         || ps->num_dmrs_cdm_grps_no_data != num_dmrs_cdm_grps_no_data)
+-      nr_set_pusch_semi_static(scc, sched_ctrl->active_ubwp, ubwpd, dci_format, tda, num_dmrs_cdm_grps_no_data, ps);
++      nr_set_pusch_semi_static(scc, sched_ctrl->active_ubwp, ubwpd, dci_format, tda, num_dmrs_cdm_grps_no_data, nrOfLayers,ps);
+     LOG_D(NR_MAC, "%s(): retransmission keeping TDA %d and TBS %d\n", __func__, tda, retInfo->tb_size);
+   } else {
+     /* the retransmission will use a different time domain allocation, check
+@@ -939,7 +940,7 @@ bool allocate_ul_retransmission(module_id_t module_id,
+       rbSize++;
+     NR_pusch_semi_static_t temp_ps;
+     int dci_format = get_dci_format(sched_ctrl);
+-    nr_set_pusch_semi_static(scc, sched_ctrl->active_ubwp,ubwpd, dci_format, tda, num_dmrs_cdm_grps_no_data, &temp_ps);
++    nr_set_pusch_semi_static(scc, sched_ctrl->active_ubwp,ubwpd, dci_format, tda, num_dmrs_cdm_grps_no_data, nrOfLayers, &temp_ps);
+     uint32_t new_tbs;
+     uint16_t new_rbSize;
+     bool success = nr_find_nb_rb(retInfo->Qm,
+@@ -1094,7 +1095,7 @@ void pf_ul(module_id_t module_id,
+       if (max_num_ue < 0)
+         return;
+ 
+-      LOG_D(NR_MAC,"Looking for min_rb %d RBs, starting at %d\n", min_rb, rbStart);
++      LOG_D(NR_MAC,"Looking for min_rb %d RBs, starting at %d num_dmrs_cdm_grps_no_data %d\n", min_rb, rbStart, ps->num_dmrs_cdm_grps_no_data);
+       while (rbStart < bwpSize && !rballoc_mask[rbStart]) rbStart++;
+       if (rbStart + min_rb >= bwpSize) {
+         LOG_W(NR_MAC, "cannot allocate continuous UL data for UE %d/RNTI %04x: no resources (rbStart %d, min_rb %d, bwpSize %d\n",
+@@ -1106,13 +1107,15 @@ void pf_ul(module_id_t module_id,
+       /* we want to avoid a lengthy deduction of DMRS and other parameters in
+        * every TTI if we can save it, so check whether dci_format, TDA, or
+        * num_dmrs_cdm_grps_no_data has changed and only then recompute */
++      const uint8_t nrOfLayers = 1;
+       const uint8_t num_dmrs_cdm_grps_no_data = (sched_ctrl->active_ubwp || ubwpd) ? 1 : 2;
+       int dci_format = get_dci_format(sched_ctrl);
+       const int tda = sched_ctrl->active_ubwp ? nrmac->preferred_ul_tda[sched_ctrl->active_ubwp->bwp_Id][slot] : 0;
+       if (ps->time_domain_allocation != tda
+           || ps->dci_format != dci_format
++          || ps->nrOfLayers != nrOfLayers
+           || ps->num_dmrs_cdm_grps_no_data != num_dmrs_cdm_grps_no_data)
+-        nr_set_pusch_semi_static(scc, sched_ctrl->active_ubwp, ubwpd, dci_format, tda, num_dmrs_cdm_grps_no_data, ps);
++        nr_set_pusch_semi_static(scc, sched_ctrl->active_ubwp, ubwpd, dci_format, tda, num_dmrs_cdm_grps_no_data,nrOfLayers, ps);
+       NR_sched_pusch_t *sched_pusch = &sched_ctrl->sched_pusch;
+       sched_pusch->mcs = 9;
+       update_ul_ue_R_Qm(sched_pusch, ps);
+@@ -1125,7 +1128,7 @@ void pf_ul(module_id_t module_id,
+                                             ps->N_PRB_DMRS * ps->num_dmrs_symb,
+                                             0, // nb_rb_oh
+                                             0,
+-                                            1 /* NrOfLayers */)
++                                            ps->nrOfLayers)
+                              >> 3;
+ 
+       /* Mark the corresponding RBs as used */
+@@ -1205,13 +1208,15 @@ void pf_ul(module_id_t module_id,
+     /* we want to avoid a lengthy deduction of DMRS and other parameters in
+      * every TTI if we can save it, so check whether dci_format, TDA, or
+      * num_dmrs_cdm_grps_no_data has changed and only then recompute */
++    const uint8_t nrOfLayers = 1;
+     const uint8_t num_dmrs_cdm_grps_no_data = (sched_ctrl->active_ubwp || ubwpd) ? 1 : 2;
+     int dci_format = get_dci_format(sched_ctrl);
+     const int tda = sched_ctrl->active_ubwp ? nrmac->preferred_ul_tda[sched_ctrl->active_ubwp->bwp_Id][slot] : 0;
+     if (ps->time_domain_allocation != tda
+         || ps->dci_format != dci_format
++        || ps->nrOfLayers != nrOfLayers
+         || ps->num_dmrs_cdm_grps_no_data != num_dmrs_cdm_grps_no_data)
+-      nr_set_pusch_semi_static(scc, sched_ctrl->active_ubwp, ubwpd, dci_format, tda, num_dmrs_cdm_grps_no_data, ps);
++      nr_set_pusch_semi_static(scc, sched_ctrl->active_ubwp, ubwpd, dci_format, tda, num_dmrs_cdm_grps_no_data, nrOfLayers,ps);
+     update_ul_ue_R_Qm(sched_pusch, ps);
+ 
+     /* Calculate the current scheduling bytes and the necessary RBs */
+@@ -1473,7 +1478,7 @@ void nr_schedule_ulsch(module_id_t module_id, frame_t frame, sub_frame_t slot)
+     sched_ctrl->last_ul_slot = sched_pusch->slot;
+ 
+     LOG_D(NR_MAC,
+-          "ULSCH/PUSCH: %4d.%2d RNTI %04x UL sched %4d.%2d DCI L %d start %2d RBS %3d startSymbol %2d nb_symbol %2d dmrs_pos %x MCS %2d TBS %4d HARQ PID %2d round %d RV %d NDI %d est %6d sched %6d est BSR %6d TPC %d\n",
++          "ULSCH/PUSCH: %4d.%2d RNTI %04x UL sched %4d.%2d DCI L %d start %2d RBS %3d startSymbol %2d nb_symbol %2d dmrs_pos %x MCS %2d nrOfLayers %2d num_dmrs_cdm_grps_no_data %2d TBS %4d HARQ PID %2d round %d RV %d NDI %d est %6d sched %6d est BSR %6d TPC %d\n",
+           frame,
+           slot,
+           rnti,
+@@ -1486,6 +1491,8 @@ void nr_schedule_ulsch(module_id_t module_id, frame_t frame, sub_frame_t slot)
+           ps->nrOfSymbols,
+           ps->ul_dmrs_symb_pos,
+           sched_pusch->mcs,
++          ps->nrOfLayers,
++          ps->num_dmrs_cdm_grps_no_data,
+           sched_pusch->tb_size,
+           harq_id,
+           cur_harq->round,
+@@ -1532,17 +1539,18 @@ void nr_schedule_ulsch(module_id_t module_id, frame_t frame, sub_frame_t slot)
+     pusch_pdu->qam_mod_order = sched_pusch->Qm;
+     pusch_pdu->mcs_index = sched_pusch->mcs;
+     pusch_pdu->mcs_table = ps->mcs_table;
+-    pusch_pdu->transform_precoding = ps->transform_precoding;
++    pusch_pdu->transformPrecoder = ps->transformPrecoder;
+     if (ps->pusch_Config && ps->pusch_Config->dataScramblingIdentityPUSCH)
+       pusch_pdu->data_scrambling_id = *ps->pusch_Config->dataScramblingIdentityPUSCH;
+     else
+       pusch_pdu->data_scrambling_id = *scc->physCellId;
+-    pusch_pdu->nrOfLayers = 1;
++    pusch_pdu->nrOfLayers = ps->nrOfLayers;
++    pusch_pdu->num_dmrs_cdm_grps_no_data = ps->num_dmrs_cdm_grps_no_data;
+ 
+     /* FAPI: DMRS */
+     pusch_pdu->ul_dmrs_symb_pos = ps->ul_dmrs_symb_pos;
+     pusch_pdu->dmrs_config_type = ps->dmrs_config_type;
+-    if (pusch_pdu->transform_precoding) { // transform precoding disabled
++    if (pusch_pdu->transformPrecoder) { // transform precoding disabled
+       long *scramblingid=NULL;
+       if (ps->NR_DMRS_UplinkConfig && pusch_pdu->scid == 0)
+         scramblingid = ps->NR_DMRS_UplinkConfig->transformPrecodingDisabled->scramblingID0;
+@@ -1562,7 +1570,7 @@ void nr_schedule_ulsch(module_id_t module_id, frame_t frame, sub_frame_t slot)
+     }
+     pusch_pdu->scid = 0;      // DMRS sequence initialization [TS38.211, sec 6.4.1.1.1]
+     pusch_pdu->num_dmrs_cdm_grps_no_data = ps->num_dmrs_cdm_grps_no_data;
+-    pusch_pdu->dmrs_ports = 1;
++    pusch_pdu->dmrs_ports = ((1<<ps->nrOfLayers) - 1);
+ 
+     /* FAPI: Pusch Allocation in frequency domain */
+     pusch_pdu->resource_alloc = 1; //type 1
+@@ -1588,7 +1596,7 @@ void nr_schedule_ulsch(module_id_t module_id, frame_t frame, sub_frame_t slot)
+     LOG_D(NR_MAC,"PUSCH PDU : data_scrambling_identity %x, dmrs_scrambling_id %x\n",pusch_pdu->data_scrambling_id,pusch_pdu->ul_dmrs_scrambling_id);
+     /* TRANSFORM PRECODING --------------------------------------------------------*/
+ 
+-    if (pusch_pdu->transform_precoding == NR_PUSCH_Config__transformPrecoder_enabled){
++    if (pusch_pdu->transformPrecoder == NR_PUSCH_Config__transformPrecoder_enabled){
+ 
+       // U as specified in section 6.4.1.1.1.2 in 38.211, if sequence hopping and group hopping are disabled
+       pusch_pdu->dfts_ofdm.low_papr_group_number = pusch_pdu->pusch_identity % 30;
+diff --git a/openair2/LAYER2/NR_MAC_gNB/mac_proto.h b/openair2/LAYER2/NR_MAC_gNB/mac_proto.h
+index 91221276b0..8a560580c9 100644
+--- a/openair2/LAYER2/NR_MAC_gNB/mac_proto.h
++++ b/openair2/LAYER2/NR_MAC_gNB/mac_proto.h
+@@ -298,6 +298,7 @@ void nr_set_pusch_semi_static(const NR_ServingCellConfigCommon_t *scc,
+                               long dci_format,
+                               int tda,
+                               uint8_t num_dmrs_cdm_grps_no_data,
++                              uint8_t nrOfLayers,
+                               NR_pusch_semi_static_t *ps);
+ 
+ uint8_t nr_get_tpc(int target, uint8_t cqi, int incr);
+diff --git a/openair2/LAYER2/NR_MAC_gNB/nr_mac_gNB.h b/openair2/LAYER2/NR_MAC_gNB/nr_mac_gNB.h
+index 27f42f4b1d..66b10dad42 100644
+--- a/openair2/LAYER2/NR_MAC_gNB/nr_mac_gNB.h
++++ b/openair2/LAYER2/NR_MAC_gNB/nr_mac_gNB.h
+@@ -318,13 +318,14 @@ typedef struct NR_sched_pucch {
+ typedef struct NR_pusch_semi_static_t {
+   int dci_format;
+   int time_domain_allocation;
++  uint8_t nrOfLayers;
+   uint8_t num_dmrs_cdm_grps_no_data;
+ 
+   int startSymbolIndex;
+   int nrOfSymbols;
+ 
+   NR_PUSCH_Config_t *pusch_Config;
+-  uint8_t transform_precoding;
++  uint8_t transformPrecoder;
+   uint8_t mcs_table;
+ 
+   long mapping_type;
+diff --git a/openair2/RRC/NR/MESSAGES/asn1_msg.c b/openair2/RRC/NR/MESSAGES/asn1_msg.c
+index ff6923429e..52d03b5e56 100755
+--- a/openair2/RRC/NR/MESSAGES/asn1_msg.c
++++ b/openair2/RRC/NR/MESSAGES/asn1_msg.c
+@@ -1867,6 +1867,7 @@ int16_t do_RRCReconfiguration(
+     //                                  1,
+     //                                  1,
+     //                                  carrier->pdsch_AntennaPorts,
++    //                                  carrier->pusch_AntennaPorts,
+     //                                  carrier->initial_csi_index[ue_context_p->local_uid + 1],
+     //                                  ue_context_pP->local_uid);
+ 
+diff --git a/openair2/RRC/NR/nr_rrc_proto.h b/openair2/RRC/NR/nr_rrc_proto.h
+index 9465b9b4b0..6a94450450 100644
+--- a/openair2/RRC/NR/nr_rrc_proto.h
++++ b/openair2/RRC/NR/nr_rrc_proto.h
+@@ -79,6 +79,7 @@ void fill_default_secondaryCellGroup(NR_ServingCellConfigCommon_t *servingcellco
+                                      int scg_id,
+                                      int servCellIndex,
+                                      int dl_antenna_ports,
++                                     int ul_antenna_ports,
+                                      int do_csirs,
+                                      int initial_csi_index,
+                                      int uid);
+@@ -90,6 +91,7 @@ void fill_default_reconfig(NR_ServingCellConfigCommon_t *servingcellconfigcommon
+                            NR_RRCReconfiguration_IEs_t *reconfig,
+                            NR_CellGroupConfig_t *secondaryCellGroup,
+                            int dl_antenna_ports,
++                           int ul_antenna_ports,
+                            int do_csirs,
+                            int initial_csi_index,
+                            int uid);
+diff --git a/openair2/RRC/NR/rrc_gNB_nsa.c b/openair2/RRC/NR/rrc_gNB_nsa.c
+index f9fce48f41..22e43cbb6a 100644
+--- a/openair2/RRC/NR/rrc_gNB_nsa.c
++++ b/openair2/RRC/NR/rrc_gNB_nsa.c
+@@ -241,6 +241,7 @@ void rrc_add_nsa_user(gNB_RRC_INST *rrc,struct rrc_gNB_ue_context_s *ue_context_
+                         reconfig_ies,
+                         ue_context_p->ue_context.secondaryCellGroup,
+                         carrier->pdsch_AntennaPorts,
++                        carrier->pusch_AntennaPorts,
+                         carrier->do_CSIRS,
+                         carrier->initial_csi_index[ue_context_p->local_uid + 1],
+                         ue_context_p->local_uid);
+@@ -250,6 +251,7 @@ void rrc_add_nsa_user(gNB_RRC_INST *rrc,struct rrc_gNB_ue_context_s *ue_context_
+                         reconfig_ies,
+                         ue_context_p->ue_context.secondaryCellGroup,
+                         carrier->pdsch_AntennaPorts,
++                        carrier->pusch_AntennaPorts,
+                         carrier->do_CSIRS,
+                         carrier->initial_csi_index[ue_context_p->local_uid + 1],
+                         ue_context_p->local_uid);
+@@ -348,7 +350,7 @@ void rrc_add_nsa_user(gNB_RRC_INST *rrc,struct rrc_gNB_ue_context_s *ue_context_
+     rrc_mac_config_req_gNB(rrc->module_id,
+                            rrc->carrier.ssb_SubcarrierOffset,
+                            rrc->carrier.pdsch_AntennaPorts,
+-			   rrc->carrier.pusch_AntennaPorts,
++                           rrc->carrier.pusch_AntennaPorts,
+                            rrc->carrier.sib1_tda,
+                            rrc->carrier.servingcellconfigcommon,
+                            &rrc->carrier.mib,
+diff --git a/openair2/RRC/NR/rrc_gNB_reconfig.c b/openair2/RRC/NR/rrc_gNB_reconfig.c
+index 980bd39c37..d03f361502 100644
+--- a/openair2/RRC/NR/rrc_gNB_reconfig.c
++++ b/openair2/RRC/NR/rrc_gNB_reconfig.c
+@@ -55,6 +55,7 @@ void fill_default_secondaryCellGroup(NR_ServingCellConfigCommon_t *servingcellco
+                                      int scg_id,
+                                      int servCellIndex,
+                                      int dl_antenna_ports,
++                                     int ul_antenna_ports,
+                                      int do_csirs,
+                                      int initial_csi_index,
+                                      int uid) {
+@@ -745,14 +746,14 @@ void fill_default_secondaryCellGroup(NR_ServingCellConfigCommon_t *servingcellco
+  pusch_Config->codebookSubset=calloc(1,sizeof(*pusch_Config->codebookSubset));
+  *pusch_Config->codebookSubset = NR_PUSCH_Config__codebookSubset_nonCoherent;
+  pusch_Config->maxRank=calloc(1,sizeof(*pusch_Config->maxRank));
+- *pusch_Config->maxRank= 1;
++ *pusch_Config->maxRank= ul_antenna_ports;
+  pusch_Config->rbg_Size=NULL;
+  pusch_Config->uci_OnPUSCH=NULL;
+  pusch_Config->tp_pi2BPSK=NULL;
+ 
+  /*------------------------------TRANSFORM PRECODING- -----------------------------------------------------------------------*/
+ 
+- uint8_t transform_precoding = NR_PUSCH_Config__transformPrecoder_disabled;
++ uint8_t transformPrecoder = NR_PUSCH_Config__transformPrecoder_disabled;
+ 
+  // TBD: configure this from .conf file, Dedicated params cannot yet be configured in .conf file.
+  // Enable this to test transform precoding enabled from dedicated config.
+@@ -764,13 +765,13 @@ void fill_default_secondaryCellGroup(NR_ServingCellConfigCommon_t *servingcellco
+ 
+  if (pusch_Config->transformPrecoder == NULL) {
+   if (servingcellconfigcommon->uplinkConfigCommon->initialUplinkBWP->rach_ConfigCommon->choice.setup->msg3_transformPrecoder != NULL)
+-    transform_precoding = NR_PUSCH_Config__transformPrecoder_enabled;
++    transformPrecoder = NR_PUSCH_Config__transformPrecoder_enabled;
+  }
+  else
+-    transform_precoding = *pusch_Config->transformPrecoder;
++    transformPrecoder = *pusch_Config->transformPrecoder;
+ 
+ 
+- if (transform_precoding == NR_PUSCH_Config__transformPrecoder_enabled ) {
++ if (transformPrecoder == NR_PUSCH_Config__transformPrecoder_enabled ) {
+     /* Enable DMRS uplink config for transform precoding enabled */
+     NR_DMRS_UplinkConfig->transformPrecodingEnabled = calloc(1,sizeof(*NR_DMRS_UplinkConfig->transformPrecodingEnabled));
+     NR_DMRS_UplinkConfig->transformPrecodingEnabled->nPUSCH_Identity = NULL;
+@@ -1348,6 +1349,7 @@ void fill_default_reconfig(NR_ServingCellConfigCommon_t *servingcellconfigcommon
+                            NR_RRCReconfiguration_IEs_t *reconfig,
+                            NR_CellGroupConfig_t *secondaryCellGroup,
+                            int dl_antenna_ports,
++                           int ul_antenna_ports,
+                            int do_csirs,
+                            int initial_csi_index,
+                            int uid) {
+@@ -1363,6 +1365,7 @@ void fill_default_reconfig(NR_ServingCellConfigCommon_t *servingcellconfigcommon
+                                   1,
+                                   1,
+                                   dl_antenna_ports,
++                                  ul_antenna_ports,
+                                   do_csirs,
+                                   initial_csi_index,
+                                   uid);
+diff --git a/targets/ARCH/rfsimulator/simulator.c b/targets/ARCH/rfsimulator/simulator.c
+index 13945998d7..20faa86591 100644
+--- a/targets/ARCH/rfsimulator/simulator.c
++++ b/targets/ARCH/rfsimulator/simulator.c
+@@ -429,7 +429,7 @@ static int rfsimulator_write_internal(rfsimulator_state_t *t, openair0_timestamp
+   if (!alreadyLocked)
+     pthread_mutex_lock(&Sockmutex);
+ 
+-  LOG_D(HW,"sending %d samples at time: %ld\n", nsamps, timestamp);
++  LOG_D(HW,"sending %d samples at time: %ld, nbAnt %d\n", nsamps, timestamp, nbAnt);
+ 
+   for (int i=0; i<FD_SETSIZE; i++) {
+     buffer_t *b=&t->buf[i];
+@@ -623,7 +623,7 @@ static int rfsimulator_read(openair0_device *device, openair0_timestamp *ptimest
+   }
+ 
+   rfsimulator_state_t *t = device->priv;
+-  LOG_D(HW, "Enter rfsimulator_read, expect %d samples, will release at TS: %ld\n", nsamps, t->nextTimestamp+nsamps);
++  LOG_D(HW, "Enter rfsimulator_read, expect %d samples, will release at TS: %ld, nbAnt %d\n", nsamps, t->nextTimestamp+nsamps, nbAnt);
+   // deliver data from received data
+   // check if a UE is connected
+   int first_sock;
+@@ -728,6 +728,7 @@ static int rfsimulator_read(openair0_device *device, openair0_timestamp *ptimest
+ 
+       for (int a=0; a<nbAnt; a++) {//loop over number of Rx antennas
+         if ( ptr->channel_model != NULL ) // apply a channel model
++        {
+           rxAddInput( ptr->circularBuf, (struct complex16 *) samplesVoid[a],
+                       a,
+                       ptr->channel_model,
+@@ -735,11 +736,21 @@ static int rfsimulator_read(openair0_device *device, openair0_timestamp *ptimest
+                       t->nextTimestamp,
+                       CirSize
+                     );
+-        else { // no channel modeling
++        }
++        else 
++        { // no channel modeling
++          
++          #if 0
+           double H_awgn_mimo[4][4] ={{1.0, 0.5, 0.25, 0.125},//rx 0
+                                      {0.5, 1.0, 0.5, 0.25},  //rx 1
+                                      {0.25, 0.5, 1.0, 0.5},  //rx 2
+                                      {0.125, 0.25, 0.5, 1.0}};//rx 3
++          #else
++          double H_awgn_mimo[4][4] ={{1.0, 0.0, 0.0, 0.0},//rx 0
++                                     {0.0, 1.0, 0.0, 0.0},//rx 1
++                                     {0.0, 0.0, 1.0, 0.0},//rx 2
++                                     {0.0, 0.0, 0.0, 1.0}};//rx 3
++          #endif
+ 
+           sample_t *out=(sample_t *)samplesVoid[a];
+           int nbAnt_tx = ptr->th.nbAnt;//number of Tx antennas
diff --git a/executables/nr-softmodem-common.h b/executables/nr-softmodem-common.h
index c02155326b6..1238b43c276 100644
--- a/executables/nr-softmodem-common.h
+++ b/executables/nr-softmodem-common.h
@@ -67,6 +67,7 @@
 #define CONFIG_HLP_ITTIL         "Generate ITTI analyzser logs (similar to wireshark logs but with more details)\n"
 #define CONFIG_HLP_DLMCS_PHYTEST "Set the downlink MCS for PHYTEST mode\n"
 #define CONFIG_HLP_DLNL_PHYTEST "Set the downlink nrOfLayers for PHYTEST mode\n"
+#define CONFIG_HLP_ULNL_PHYTEST "Set the uplink nrOfLayers for PHYTEST mode\n"
 #define CONFIG_HLP_STMON         "Enable processing timing measurement of lte softmodem on per subframe basis \n"
 #define CONFIG_HLP_MSLOTS        "Skip the missed slots/subframes \n"
 #define CONFIG_HLP_ULMCS_PHYTEST "Set the uplink MCS for PHYTEST mode\n"
diff --git a/executables/nr-softmodem.h b/executables/nr-softmodem.h
index 896c363cbf1..a11572c0e15 100644
--- a/executables/nr-softmodem.h
+++ b/executables/nr-softmodem.h
@@ -23,6 +23,7 @@
     {"E" ,                    CONFIG_HLP_TQFS,        PARAMFLAG_BOOL,   i8ptr:&threequarter_fs,             defintval:0,                   TYPE_INT8,   0},        \
     {"m" ,                    CONFIG_HLP_DLMCS_PHYTEST,0,               uptr:&target_dl_mcs,                defintval:0,                   TYPE_UINT,   0},        \
     {"l" ,                    CONFIG_HLP_DLNL_PHYTEST,0,                uptr:&target_dl_Nl,                 defintval:0,                   TYPE_UINT,   0},        \
+    {"L" ,                    CONFIG_HLP_ULNL_PHYTEST,0,                uptr:&target_ul_Nl,                 defintval:0,                   TYPE_UINT,   0},        \
     {"t" ,                    CONFIG_HLP_ULMCS_PHYTEST,0,               uptr:&target_ul_mcs,                defintval:0,                   TYPE_UINT,   0},        \
     {"M" ,                    CONFIG_HLP_DLBW_PHYTEST,0,                uptr:&target_dl_bw,                 defintval:0,                   TYPE_UINT,   0},        \
     {"T" ,                    CONFIG_HLP_ULBW_PHYTEST,0,                uptr:&target_ul_bw,                 defintval:0,                   TYPE_UINT,   0},        \
@@ -36,6 +37,7 @@
 extern threads_t threads;
 extern uint32_t target_dl_mcs;
 extern uint32_t target_dl_Nl;
+extern uint32_t target_ul_Nl;
 extern uint32_t target_ul_mcs;
 extern uint32_t target_dl_bw;
 extern uint32_t target_ul_bw;
diff --git a/executables/nr-ue.c b/executables/nr-ue.c
index 60ff40abeb1..a23f91b1dd8 100644
--- a/executables/nr-ue.c
+++ b/executables/nr-ue.c
@@ -278,7 +278,7 @@ void processSlotTX(void *arg) {
   int tx_slot_type = nr_ue_slot_select(cfg, proc->frame_tx, proc->nr_slot_tx);
   uint8_t gNB_id = 0;
 
-  LOG_D(PHY,"%d.%d => slot type %d\n",proc->frame_tx,proc->nr_slot_tx,tx_slot_type);
+  LOG_D(PHY,"processSlotTX %d.%d => slot type %d\n",proc->frame_tx,proc->nr_slot_tx,tx_slot_type);
   if (tx_slot_type == NR_UPLINK_SLOT || tx_slot_type == NR_MIXED_SLOT){
 
     // trigger L2 to run ue_scheduler thru IF module
@@ -372,7 +372,8 @@ void processSlotRX(void *arg) {
       LOG_D(PHY, "Sending Uplink data \n");
       nr_ue_pusch_common_procedures(UE,
                                     proc->nr_slot_tx,
-                                    &UE->frame_parms,1);
+                                    &UE->frame_parms,
+                                    UE->frame_parms.nb_antennas_tx);
     }
 
     if (UE->UE_mode[gNB_id] > NOT_SYNCHED && UE->UE_mode[gNB_id] < PUSCH) {
diff --git a/executables/nr-uesoftmodem.c b/executables/nr-uesoftmodem.c
index ad5ea982c94..57fd2846013 100644
--- a/executables/nr-uesoftmodem.c
+++ b/executables/nr-uesoftmodem.c
@@ -465,8 +465,11 @@ int main( int argc, char **argv ) {
     memset(UE[CC_id],0,sizeof(PHY_VARS_NR_UE));
 
     set_options(CC_id, UE[CC_id]);
-    NR_UE_MAC_INST_t *mac = get_mac_inst(0);
-
+    NR_DL_FRAME_PARMS *fp = &UE[CC_id]->frame_parms;
+    NR_UE_MAC_INST_t *mac = get_mac_inst(0);  
+    
+	mac->phy_config.config_req.carrier_config.num_tx_ant = fp->nb_antennas_tx;
+	
     if (get_softmodem_params()->sa) {
       uint16_t nr_band = get_band(downlink_frequency[CC_id][0],uplink_frequency_offset[CC_id][0]);
       mac->nr_band = nr_band;
diff --git a/nfapi/open-nFAPI/nfapi/public_inc/fapi_nr_ue_interface.h b/nfapi/open-nFAPI/nfapi/public_inc/fapi_nr_ue_interface.h
index 0e923931ad4..8b93417cb0c 100644
--- a/nfapi/open-nFAPI/nfapi/public_inc/fapi_nr_ue_interface.h
+++ b/nfapi/open-nFAPI/nfapi/public_inc/fapi_nr_ue_interface.h
@@ -298,9 +298,10 @@ typedef struct
   uint8_t  qam_mod_order;
   uint8_t  mcs_index;
   uint8_t  mcs_table;
-  uint8_t  transform_precoding;
+  uint8_t  transformPrecoder;
   uint16_t data_scrambling_id;
   uint8_t  nrOfLayers;
+  uint8_t  Tpmi;
   //DMRS
   uint16_t  ul_dmrs_symb_pos;
   uint8_t  dmrs_config_type;
diff --git a/nfapi/open-nFAPI/nfapi/public_inc/nfapi_nr_interface_scf.h b/nfapi/open-nFAPI/nfapi/public_inc/nfapi_nr_interface_scf.h
index 9d245446c86..58ff9037cef 100644
--- a/nfapi/open-nFAPI/nfapi/public_inc/nfapi_nr_interface_scf.h
+++ b/nfapi/open-nFAPI/nfapi/public_inc/nfapi_nr_interface_scf.h
@@ -1208,9 +1208,10 @@ typedef struct
   uint8_t  qam_mod_order;
   uint8_t  mcs_index;
   uint8_t  mcs_table;
-  uint8_t  transform_precoding;
+  uint8_t  transformPrecoder;
   uint16_t data_scrambling_id;
   uint8_t  nrOfLayers;
+  uint8_t  Tpmi;
   //DMRS
   uint16_t  ul_dmrs_symb_pos;
   uint8_t  dmrs_config_type;
diff --git a/nfapi/open-nFAPI/nfapi/src/nfapi_p7.c b/nfapi/open-nFAPI/nfapi/src/nfapi_p7.c
index 7cb74f8562c..473b0900bcf 100644
--- a/nfapi/open-nFAPI/nfapi/src/nfapi_p7.c
+++ b/nfapi/open-nFAPI/nfapi/src/nfapi_p7.c
@@ -941,9 +941,10 @@ static uint8_t pack_ul_tti_request_pusch_pdu(nfapi_nr_pusch_pdu_t *pusch_pdu, ui
         push8(pusch_pdu->qam_mod_order, ppWritePackedMsg, end) &&
         push8(pusch_pdu->mcs_index, ppWritePackedMsg, end) &&
         push8(pusch_pdu->mcs_table, ppWritePackedMsg, end) &&
-        push8(pusch_pdu->transform_precoding, ppWritePackedMsg, end) &&
+        push8(pusch_pdu->transformPrecoder, ppWritePackedMsg, end) &&
         push16(pusch_pdu->data_scrambling_id, ppWritePackedMsg, end) &&
         push8(pusch_pdu->nrOfLayers, ppWritePackedMsg, end) &&
+        push8(pusch_pdu->Tpmi, ppWritePackedMsg, end) &&
         push16(pusch_pdu->ul_dmrs_symb_pos, ppWritePackedMsg, end) &&
         push8(pusch_pdu->dmrs_config_type, ppWritePackedMsg, end) &&
         push16(pusch_pdu->ul_dmrs_scrambling_id, ppWritePackedMsg, end) &&
@@ -4445,9 +4446,10 @@ static uint8_t unpack_ul_tti_request_pusch_pdu(void *tlv, uint8_t **ppReadPacked
         pull8(ppReadPackedMsg, &pusch_pdu->qam_mod_order,  end) &&
         pull8(ppReadPackedMsg, &pusch_pdu->mcs_index,  end) &&
         pull8(ppReadPackedMsg, &pusch_pdu->mcs_table, end) &&
-        pull8(ppReadPackedMsg, &pusch_pdu->transform_precoding, end) &&
+        pull8(ppReadPackedMsg, &pusch_pdu->transformPrecoder, end) &&
         pull16(ppReadPackedMsg, &pusch_pdu->data_scrambling_id, end) &&
         pull8(ppReadPackedMsg, &pusch_pdu->nrOfLayers, end) &&
+        pull8(ppReadPackedMsg, &pusch_pdu->Tpmi, end) &&
         pull16(ppReadPackedMsg, &pusch_pdu->ul_dmrs_symb_pos, end) &&
         pull8(ppReadPackedMsg, &pusch_pdu->dmrs_config_type, end) &&
         pull16(ppReadPackedMsg, &pusch_pdu->ul_dmrs_scrambling_id, end) &&
diff --git a/openair1/PHY/INIT/nr_init.c b/openair1/PHY/INIT/nr_init.c
index cd4848ea8ce..2ac32302156 100644
--- a/openair1/PHY/INIT/nr_init.c
+++ b/openair1/PHY/INIT/nr_init.c
@@ -276,11 +276,21 @@ int phy_init_nr_gNB(PHY_VARS_gNB *gNB,
     pusch_vars[ULSCH_id]->ul_ch_magb0           = (int32_t **)malloc16(n_buf*sizeof(int32_t *) );
     pusch_vars[ULSCH_id]->ul_ch_mag             = (int32_t **)malloc16(n_buf*sizeof(int32_t *) );
     pusch_vars[ULSCH_id]->ul_ch_magb            = (int32_t **)malloc16(n_buf*sizeof(int32_t *) );
-    pusch_vars[ULSCH_id]->rho                   = (int32_t **)malloc16_clear(n_buf*sizeof(int32_t*) );
+    pusch_vars[ULSCH_id]->rho                   = (int32_t ***)malloc16_clear(Prx*sizeof(int32_t**) );
+    pusch_vars[ULSCH_id]->llr_layers            = (int16_t **)malloc16(max_ul_mimo_layers*sizeof(int32_t *) );
 
     for (i=0; i<Prx; i++) {
       pusch_vars[ULSCH_id]->rxdataF_ext[i]           = (int32_t *)malloc16_clear( sizeof(int32_t)*N_RB_UL*12*fp->symbols_per_slot );
       pusch_vars[ULSCH_id]->rxdataF_ext2[i]          = (int32_t *)malloc16_clear( sizeof(int32_t)*N_RB_UL*12*fp->symbols_per_slot );
+      pusch_vars[ULSCH_id]->rho[i]                   = (int32_t **)malloc16_clear( NR_MAX_NB_LAYERS*NR_MAX_NB_LAYERS*sizeof(int32_t*));
+
+      for (int j=0; j< max_ul_mimo_layers; j++) 
+      {
+        for (int k=0; k<max_ul_mimo_layers; k++) 
+        {
+          pusch_vars[ULSCH_id]->rho[i][j*max_ul_mimo_layers+k]=(int32_t *)malloc16_clear( sizeof(int32_t) * fp->N_RB_UL*12*7*2 );
+        }
+      }
     }
     for (i=0; i<n_buf; i++) {
       pusch_vars[ULSCH_id]->ul_ch_estimates[i]       = (int32_t *)malloc16_clear( sizeof(int32_t)*fp->ofdm_symbol_size*2*fp->symbols_per_slot );
@@ -293,8 +303,12 @@ int phy_init_nr_gNB(PHY_VARS_gNB *gNB,
       pusch_vars[ULSCH_id]->ul_ch_mag0[i]            = (int32_t *)malloc16_clear( fp->symbols_per_slot*sizeof(int32_t)*N_RB_UL*12 );
       pusch_vars[ULSCH_id]->ul_ch_magb0[i]           = (int32_t *)malloc16_clear( fp->symbols_per_slot*sizeof(int32_t)*N_RB_UL*12 );
       pusch_vars[ULSCH_id]->ul_ch_mag[i]             = (int32_t *)malloc16_clear( fp->symbols_per_slot*sizeof(int32_t)*N_RB_UL*12 );
-      pusch_vars[ULSCH_id]->ul_ch_magb[i]            = (int32_t *)malloc16_clear( fp->symbols_per_slot*sizeof(int32_t)*N_RB_UL*12 );
-      pusch_vars[ULSCH_id]->rho[i]                   = (int32_t *)malloc16_clear( sizeof(int32_t)*(fp->N_RB_UL*12*7*2) );
+      pusch_vars[ULSCH_id]->ul_ch_magb[i]            = (int32_t *)malloc16_clear( fp->symbols_per_slot*sizeof(int32_t)*N_RB_UL*12 );      
+    }
+
+    for (i=0; i< max_ul_mimo_layers; i++) 
+    {
+      pusch_vars[ULSCH_id]->llr_layers[i] = (int16_t *)malloc16_clear( (8*((3*8*6144)+12))*sizeof(int16_t) ); // [hna] 6144 is LTE and (8*((3*8*6144)+12)) is not clear
     }
     pusch_vars[ULSCH_id]->llr = (int16_t *)malloc16_clear( (8*((3*8*6144)+12))*sizeof(int16_t) ); // [hna] 6144 is LTE and (8*((3*8*6144)+12)) is not clear
     pusch_vars[ULSCH_id]->ul_valid_re_per_slot  = (int16_t *)malloc16_clear( sizeof(int16_t)*fp->symbols_per_slot);
diff --git a/openair1/PHY/INIT/nr_init_ru.c b/openair1/PHY/INIT/nr_init_ru.c
index 4024ed16172..169db48e699 100644
--- a/openair1/PHY/INIT/nr_init_ru.c
+++ b/openair1/PHY/INIT/nr_init_ru.c
@@ -41,7 +41,7 @@ int nr_phy_init_RU(RU_t *ru) {
   int p;
   int re;
 
-  LOG_I(PHY,"Initializing RU signal buffers (if_south %s) nb_tx %d\n",ru_if_types[ru->if_south],ru->nb_tx);
+  LOG_I(PHY,"Initializing RU signal buffers (if_south %s) nb_tx %d, nb_rx %d\n",ru_if_types[ru->if_south],ru->nb_tx, ru->nb_rx);
 
   nfapi_nr_config_request_scf_t *cfg;
   ru->nb_log_antennas=0;
diff --git a/openair1/PHY/INIT/nr_init_ue.c b/openair1/PHY/INIT/nr_init_ue.c
index b60e77e814b..11fb6e7d613 100644
--- a/openair1/PHY/INIT/nr_init_ue.c
+++ b/openair1/PHY/INIT/nr_init_ue.c
@@ -124,7 +124,8 @@ void phy_init_nr_ue_PUSCH(NR_UE_PUSCH *const pusch,
   AssertFatal( pusch, "pusch==0" );
 
   for (int i=0; i<NR_MAX_NB_LAYERS; i++) {
-    pusch->txdataF_layers[i] = (int32_t *)malloc16_clear(NR_MAX_PUSCH_ENCODED_LENGTH*sizeof(int32_t));
+    pusch->txdataF_layers[i] = (int32_t *)malloc16_clear((NR_MAX_PUSCH_ENCODED_LENGTH)*sizeof(int32_t *));
+    pusch->txdataF_precoding[i] = (int32_t *)malloc16_clear((NR_MAX_PUSCH_ENCODED_LENGTH)*sizeof(int32_t *));
   }
 }
 
@@ -143,7 +144,7 @@ int init_nr_ue_signal(PHY_VARS_NR_UE *ue,
   uint16_t N_n_scid[2] = {0,1}; // [HOTFIX] This is a temporary implementation of scramblingID0 and scramblingID1 which are given by DMRS-UplinkConfig
   int n_scid;
   abstraction_flag = 0;
-  LOG_I(PHY, "Initializing UE vars (abstraction %u) for gNB TXant %u, UE RXant %u\n", abstraction_flag, fp->nb_antennas_tx, fp->nb_antennas_rx);
+  LOG_I(PHY, "Initializing UE vars (abstraction %u) for UE TXant %u, UE RXant %u\n", abstraction_flag, fp->nb_antennas_tx, fp->nb_antennas_rx);
   //LOG_D(PHY,"[MSC_NEW][FRAME 00000][PHY_UE][MOD %02u][]\n", ue->Mod_id+NB_gNB_INST);
   phy_init_nr_top(ue);
   // many memory allocation sizes are hard coded
diff --git a/openair1/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c b/openair1/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c
index 1f164748bf5..9a432e86158 100644
--- a/openair1/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c
+++ b/openair1/PHY/NR_ESTIMATION/nr_ul_channel_estimation.c
@@ -110,7 +110,7 @@ int nr_pusch_channel_estimation(PHY_VARS_gNB *gNB,
          fr = filt8_r1;
          fmm = filt8_mm1;
          fml = filt8_ml1;
-         fmr = filt8_m1;
+         fmr = filt8_mm1;
          fdcl = filt8_dcl1;
          fdcr = filt8_dcr1;
          fdclh = filt8_dcl1_h;
@@ -129,9 +129,8 @@ int nr_pusch_channel_estimation(PHY_VARS_gNB *gNB,
 
   //------------------generate DMRS------------------//
 
-  // transform precoding = 1 means disabled
-  if (pusch_pdu->transform_precoding == 1) {
-    nr_pusch_dmrs_rx(gNB, Ns, gNB->nr_gold_pusch_dmrs[pusch_pdu->scid][Ns][symbol], &pilot[0], 1000, 0, nb_rb_pusch,
+  if (pusch_pdu->transformPrecoder == transformPrecoder_disabled) {
+    nr_pusch_dmrs_rx(gNB, Ns, gNB->nr_gold_pusch_dmrs[pusch_pdu->scid][Ns][symbol], &pilot[0], (1000+p), 0, nb_rb_pusch,
                      (pusch_pdu->bwp_start + pusch_pdu->rb_start)*NR_NB_SC_PER_RB, pusch_pdu->dmrs_config_type);
   }
   else {  // if transform precoding or SC-FDMA is enabled in Uplink
@@ -291,7 +290,7 @@ int nr_pusch_channel_estimation(PHY_VARS_gNB *gNB,
         printf("pilot %u : rxF - > (%d,%d) (%d) ch -> (%d,%d) (%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],dBc(rxF[0],rxF[1]),ch[0],ch[1],dBc(ch[0],ch[1]),pil[0],pil[1]);
 	printf("data %u : rxF - > (%d,%d) (%d)\n",pilot_cnt,rxF[2],rxF[3],dBc(rxF[2],rxF[3]));
   #endif
-        multadd_real_vector_complex_scalar(fml,
+        multadd_real_vector_complex_scalar(fm,
                                            ch,
                                            ul_ch,
                                            8);
diff --git a/openair1/PHY/NR_TRANSPORT/nr_transport_proto.h b/openair1/PHY/NR_TRANSPORT/nr_transport_proto.h
index 4fed3a468fe..87e4d573629 100644
--- a/openair1/PHY/NR_TRANSPORT/nr_transport_proto.h
+++ b/openair1/PHY/NR_TRANSPORT/nr_transport_proto.h
@@ -137,21 +137,22 @@ int nr_rx_pusch(PHY_VARS_gNB *gNB,
     @param nb_rb_pusch The number of RBs allocated (used for Resource Allocation Type 1 in NR)
     @param frame_parms, Pointer to frame descriptor structure
 */
-void nr_ulsch_extract_rbs_single(int32_t **rxdataF,
-                                 NR_gNB_PUSCH *pusch_vars,
-                                 int slot,
-                                 unsigned char symbol,
-                                 uint8_t is_dmrs_symbol,
-                                 nfapi_nr_pusch_pdu_t *pusch_pdu,
-                                 NR_DL_FRAME_PARMS *frame_parms);
+void nr_ulsch_extract_rbs(int32_t **rxdataF,
+                          NR_gNB_PUSCH *pusch_vars,
+                          int slot,
+                          unsigned char symbol,
+                          uint8_t is_dmrs_symbol,
+                          nfapi_nr_pusch_pdu_t *pusch_pdu,
+                          NR_DL_FRAME_PARMS *frame_parms);
 
 void nr_ulsch_scale_channel(int32_t **ul_ch_estimates_ext,
                             NR_DL_FRAME_PARMS *frame_parms,
                             NR_gNB_ULSCH_t **ulsch_gNB,
-                            uint8_t symbol,
-                            uint8_t start_symbol,
-                            uint16_t nb_rb,
-                            pusch_dmrs_type_t pusch_dmrs_type);
+                            uint8_t symbol, 
+                            uint8_t is_dmrs_symbol,                           
+                            uint32_t len,
+                            uint8_t nrOfLayers,
+                            uint16_t nb_rb);
 
 /** \brief This function computes the average channel level over all allocated RBs and antennas (TX/RX) in order to compute output shift for compensated signal
     @param ul_ch_estimates_ext Channel estimates in allocated RBs
@@ -185,9 +186,10 @@ void nr_ulsch_channel_compensation(int **rxdataF_ext,
                                 int **ul_ch_mag,
                                 int **ul_ch_magb,
                                 int **rxdataF_comp,
-                                int **rho,
+                                int ***rho,
                                 NR_DL_FRAME_PARMS *frame_parms,
                                 unsigned char symbol,
+                                int length,
                                 uint8_t is_dmrs_symbol,
                                 unsigned char mod_order,
                                 uint8_t  nrOfLayers,
@@ -255,8 +257,8 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
     @param mod_order modulation order
 */
 void nr_ulsch_compute_llr(int32_t *rxdataF_comp,
-                          int32_t **ul_ch_mag,
-                          int32_t **ul_ch_magb,
+                          int32_t *ul_ch_mag,
+                          int32_t *ul_ch_magb,
                           int16_t  *ulsch_llr,
                           uint32_t nb_rb,
                           uint32_t nb_re,
diff --git a/openair1/PHY/NR_TRANSPORT/nr_ulsch.c b/openair1/PHY/NR_TRANSPORT/nr_ulsch.c
index 4278338dbcc..3ea19a9970f 100644
--- a/openair1/PHY/NR_TRANSPORT/nr_ulsch.c
+++ b/openair1/PHY/NR_TRANSPORT/nr_ulsch.c
@@ -137,6 +137,38 @@ void nr_ulsch_unscrambling_optim(int16_t* llr,
 #endif
 }
 
+void nr_ulsch_layer_demapping(int16_t *llr_cw,
+				     uint8_t Nl,
+				     uint8_t mod_order,
+				     uint32_t length,
+				     int16_t **llr_layers) 
+{
+
+  switch (Nl) {
+
+    case 1:
+      memcpy((void*)llr_cw, (void*)llr_layers[0], (length)*sizeof(int16_t));
+    break;
+
+    case 2:
+    case 3:
+    case 4:
+      for (int i=0; i<(length/Nl/mod_order); i++)
+      {
+        for (int l=0; l<Nl; l++) 
+        {
+        	for (int m=0; m<mod_order; m++){
+        		llr_cw[i*Nl*mod_order+l*mod_order+m] = llr_layers[l][i*mod_order+m];
+        	}
+        }
+  	  }
+    break;
+
+  default:
+  AssertFatal(0, "Not supported number of layers %d\n", Nl);
+  }
+}
+
 void dump_pusch_stats(FILE *fd,PHY_VARS_gNB *gNB) {
 
   for (int i=0;i<gNB->number_of_nr_ulsch_max;i++) {
diff --git a/openair1/PHY/NR_TRANSPORT/nr_ulsch.h b/openair1/PHY/NR_TRANSPORT/nr_ulsch.h
index 80c10d74b15..0e660351aef 100644
--- a/openair1/PHY/NR_TRANSPORT/nr_ulsch.h
+++ b/openair1/PHY/NR_TRANSPORT/nr_ulsch.h
@@ -82,6 +82,12 @@ void nr_ulsch_unscrambling_optim(int16_t* llr,
 				 uint32_t Nid,
 				 uint32_t n_RNTI);
 
+void nr_ulsch_layer_demapping(int16_t *llr_cw,
+				     uint8_t Nl,
+				     uint8_t mod_order,
+				     uint32_t length,
+				     int16_t **llr_layers); 
+
 void nr_ulsch_procedures(PHY_VARS_gNB *gNB,
                          int frame_rx,
                          int slot_rx,
diff --git a/openair1/PHY/NR_TRANSPORT/nr_ulsch_demodulation.c b/openair1/PHY/NR_TRANSPORT/nr_ulsch_demodulation.c
index 28dd5ab2192..8314c83ad98 100644
--- a/openair1/PHY/NR_TRANSPORT/nr_ulsch_demodulation.c
+++ b/openair1/PHY/NR_TRANSPORT/nr_ulsch_demodulation.c
@@ -302,26 +302,27 @@ void nr_idft(int32_t *z, uint32_t Msc_PUSCH)
 }
 
 
-void nr_ulsch_extract_rbs_single(int32_t **rxdataF,
-                                 NR_gNB_PUSCH *pusch_vars,
-                                 int slot,
-                                 unsigned char symbol,
-                                 uint8_t is_dmrs_symbol,
-                                 nfapi_nr_pusch_pdu_t *pusch_pdu,
-                                 NR_DL_FRAME_PARMS *frame_parms)
+void nr_ulsch_extract_rbs(int32_t **rxdataF,
+                          NR_gNB_PUSCH *pusch_vars,
+                          int slot,
+                          unsigned char symbol,
+                          uint8_t is_dmrs_symbol,
+                          nfapi_nr_pusch_pdu_t *pusch_pdu,
+                          NR_DL_FRAME_PARMS *frame_parms)
 {
 
   unsigned short start_re, re, nb_re_pusch;
-  unsigned char aarx;
+  unsigned char aarx, aatx;
   uint32_t rxF_ext_index = 0;
   uint32_t ul_ch0_ext_index = 0;
   uint32_t ul_ch0_index = 0;
-  uint8_t k_prime;
-  uint16_t n;
+  //uint8_t k_prime;
+  //uint16_t n;
   int16_t *rxF,*rxF_ext;
   int *ul_ch0,*ul_ch0_ext;
-  uint8_t delta = 0;
+  //uint8_t delta = 0;
   int soffset = (slot&3)*frame_parms->symbols_per_slot*frame_parms->ofdm_symbol_size;
+
 #ifdef DEBUG_RB_EXT
 
   printf("--------------------symbol = %d-----------------------\n", symbol);
@@ -329,7 +330,7 @@ void nr_ulsch_extract_rbs_single(int32_t **rxdataF,
 
 #endif
 
-  uint8_t is_dmrs_re;
+  uint8_t is_data_re;
   start_re = (frame_parms->first_carrier_offset + (pusch_pdu->rb_start + pusch_pdu->bwp_start) * NR_NB_SC_PER_RB)%frame_parms->ofdm_symbol_size;
   nb_re_pusch = NR_NB_SC_PER_RB * pusch_pdu->rb_size;
 #ifdef __AVX2__
@@ -338,64 +339,95 @@ void nr_ulsch_extract_rbs_single(int32_t **rxdataF,
   int nb_re_pusch2 = nb_re_pusch;
 #endif
 
-  for (aarx = 0; aarx < frame_parms->nb_antennas_rx; aarx++) {
+  for (aarx = 0; aarx < frame_parms->nb_antennas_rx; aarx++) 
+  {
 
     rxF       = (int16_t *)&rxdataF[aarx][soffset+(symbol * frame_parms->ofdm_symbol_size)];
     rxF_ext   = (int16_t *)&pusch_vars->rxdataF_ext[aarx][symbol * nb_re_pusch2]; // [hna] rxdataF_ext isn't contiguous in order to solve an alignment problem ib llr computation in case of mod_order = 4, 6
-
-    ul_ch0     = &pusch_vars->ul_ch_estimates[aarx][pusch_vars->dmrs_symbol*frame_parms->ofdm_symbol_size]; // update channel estimates if new dmrs symbol are available
-
-    ul_ch0_ext = &pusch_vars->ul_ch_estimates_ext[aarx][symbol*nb_re_pusch2];
-
-    n = 0;
-    k_prime = 0;
-    rxF_ext_index = 0;
-    ul_ch0_ext_index = 0;
-    ul_ch0_index = 0;
-
-    if (is_dmrs_symbol == 0) {
+    
+    if (is_dmrs_symbol == 0) 
+    {
       //
       //rxF[ ((start_re + re)*2)      % (frame_parms->ofdm_symbol_size*2)]);
-      if (start_re + nb_re_pusch < frame_parms->ofdm_symbol_size) {
+      if (start_re + nb_re_pusch < frame_parms->ofdm_symbol_size) 
+      {
         memcpy1((void*)rxF_ext,
                 (void*)&rxF[start_re*2],
                 nb_re_pusch*sizeof(int32_t));
-      } else {
-	int neg_length = frame_parms->ofdm_symbol_size-start_re;
-	int pos_length = nb_re_pusch-neg_length;
+      } 
+      else 
+      {
+        int neg_length = frame_parms->ofdm_symbol_size-start_re;
+        int pos_length = nb_re_pusch-neg_length;
+        memcpy1((void*)rxF_ext,(void*)&rxF[start_re*2],neg_length*sizeof(int32_t));
+        memcpy1((void*)&rxF_ext[2*neg_length],(void*)rxF,pos_length*sizeof(int32_t));
+      }
 
-	memcpy1((void*)rxF_ext,(void*)&rxF[start_re*2],neg_length*sizeof(int32_t));
-	memcpy1((void*)&rxF_ext[2*neg_length],(void*)rxF,pos_length*sizeof(int32_t));
+      #ifdef SUPPORT_PMI_MATRIC  
+      for (aatx = 0; aatx < pusch_pdu->nrOfLayers; aatx++)
+      #else
+      aatx = aarx;
+      #endif
+      {
+        ul_ch0     = &pusch_vars->ul_ch_estimates[aatx*frame_parms->nb_antennas_rx+aarx][pusch_vars->dmrs_symbol*frame_parms->ofdm_symbol_size]; // update channel estimates if new dmrs symbol are available
+        ul_ch0_ext = &pusch_vars->ul_ch_estimates_ext[aatx*frame_parms->nb_antennas_rx+aarx][symbol*nb_re_pusch2];
+  
+        memcpy1((void*)ul_ch0_ext,(void*)ul_ch0,nb_re_pusch*sizeof(int32_t));
       }
-      memcpy1((void*)ul_ch0_ext,(void*)ul_ch0,nb_re_pusch*sizeof(int32_t));
     }
-    else {
-      for (re = 0; re < nb_re_pusch; re++) {
-
-        is_dmrs_re = (re == get_dmrs_freq_idx_ul(n, k_prime, delta, pusch_pdu->dmrs_config_type));
-
-#ifdef DEBUG_RB_EXT
-        printf("re = %d, kprime %d, n %d, is_dmrs_symbol = %d, symbol = %d\n", re, k_prime, n, is_dmrs_symbol, symbol);
-#endif
-
-        /* save only data and respective channel estimates */
-        if (is_dmrs_re == 0) {
-          rxF_ext[rxF_ext_index]     = (rxF[ ((start_re + re)*2)      % (frame_parms->ofdm_symbol_size*2)]);
-          rxF_ext[rxF_ext_index + 1] = (rxF[(((start_re + re)*2) + 1) % (frame_parms->ofdm_symbol_size*2)]);
-          ul_ch0_ext[ul_ch0_ext_index] = ul_ch0[ul_ch0_index];
+    else 
+    { 
+      #ifdef SUPPORT_PMI_MATRIC  
+      for (aatx = 0; aatx < pusch_pdu->nrOfLayers; aatx++)
+      #else
+      aatx = aarx;
+      #endif      
+      {
+        ul_ch0     = &pusch_vars->ul_ch_estimates[aatx*frame_parms->nb_antennas_rx+aarx][pusch_vars->dmrs_symbol*frame_parms->ofdm_symbol_size]; // update channel estimates if new dmrs symbol are available
+        ul_ch0_ext = &pusch_vars->ul_ch_estimates_ext[aatx*frame_parms->nb_antennas_rx+aarx][symbol*nb_re_pusch2];
+
+        //n = 0;
+        //k_prime = 0;
+        rxF_ext_index = 0;
+        ul_ch0_ext_index = 0;
+        ul_ch0_index = 0;        
+        
+        for (re = 0; re < nb_re_pusch; re++) 
+        {  
+          uint16_t k= start_re+re;
+                
+          is_data_re = allowed_xlsch_re_in_dmrs_symbol(k, start_re, frame_parms->ofdm_symbol_size, pusch_pdu->num_dmrs_cdm_grps_no_data, pusch_pdu->dmrs_config_type);
+          
+          if (++k >= frame_parms->ofdm_symbol_size)
+          {
+            k -= frame_parms->ofdm_symbol_size;
+          }
 
-#ifdef DEBUG_RB_EXT
-          printf("dmrs symb %d: rxF_ext[%d] = (%d,%d), ul_ch0_ext[%d] = (%d,%d)\n",
+          #ifdef DEBUG_RB_EXT
+          printf("re = %d, kprime %d, n %d, is_dmrs_symbol = %d, symbol = %d\n", re, k_prime, n, is_dmrs_symbol, symbol);
+          #endif
+
+          /* save only data and respective channel estimates */
+          if (is_data_re == 1) 
+          { 
+            if (aatx == aarx)
+            {
+              rxF_ext[rxF_ext_index]     = (rxF[ ((start_re + re)*2)      % (frame_parms->ofdm_symbol_size*2)]);
+              rxF_ext[rxF_ext_index + 1] = (rxF[(((start_re + re)*2) + 1) % (frame_parms->ofdm_symbol_size*2)]);
+              rxF_ext_index +=2;
+            }
+          
+            ul_ch0_ext[ul_ch0_ext_index] = ul_ch0[ul_ch0_index];
+            ul_ch0_ext_index++;
+
+            #ifdef DEBUG_RB_EXT
+            printf("dmrs symb %d: rxF_ext[%d] = (%d,%d), ul_ch0_ext[%d] = (%d,%d)\n",
                  is_dmrs_symbol,rxF_ext_index>>1, rxF_ext[rxF_ext_index],rxF_ext[rxF_ext_index+1],
                  ul_ch0_ext_index,  ((int16_t*)&ul_ch0_ext[ul_ch0_ext_index])[0],  ((int16_t*)&ul_ch0_ext[ul_ch0_ext_index])[1]);
-#endif
-          ul_ch0_ext_index++;
-          rxF_ext_index +=2;
-        } else {
-          n += k_prime;
-          k_prime ^= 1;
+            #endif          
+          } 
+          ul_ch0_index++;
         }
-        ul_ch0_index++;
       }
     }
   }
@@ -406,16 +438,19 @@ void nr_ulsch_scale_channel(int **ul_ch_estimates_ext,
                             NR_gNB_ULSCH_t **ulsch_gNB,
                             uint8_t symbol,
                             uint8_t is_dmrs_symbol,
-                            unsigned short nb_rb,
-                            pusch_dmrs_type_t pusch_dmrs_type)
+                            uint32_t len,
+                            uint8_t nrOfLayers,
+                            unsigned short nb_rb)
 {
 
 #if defined(__x86_64__)||defined(__i386__)
 
   short rb, ch_amp;
-  unsigned char aarx;
+  unsigned char aarx,aatx;
   __m128i *ul_ch128, ch_amp128;
 
+  uint32_t nb_rb_0 = len/12 + ((len%12)?1:0);
+
   // Determine scaling amplitude based the symbol
 
   ch_amp = 1024*8; //((pilots) ? (ulsch_gNB[0]->sqrt_rho_b) : (ulsch_gNB[0]->sqrt_rho_a));
@@ -431,35 +466,26 @@ void nr_ulsch_scale_channel(int **ul_ch_estimates_ext,
   int off = 0;
 #endif
 
-  for (aarx=0; aarx < frame_parms->nb_antennas_rx; aarx++) {
-
-      ul_ch128 = (__m128i *)&ul_ch_estimates_ext[aarx][symbol*(off+(nb_rb*NR_NB_SC_PER_RB))];
-
-      if (is_dmrs_symbol==1){
-        if (pusch_dmrs_type == pusch_dmrs_type1)
-          nb_rb = nb_rb>>1;
-        else
-          nb_rb = (2*nb_rb)/3;
-      }
-
-      for (rb=0;rb<nb_rb;rb++) {
+for (aatx = 0; aatx < nrOfLayers; aatx++)
+{
+  for (aarx=0; aarx < frame_parms->nb_antennas_rx; aarx++) 
+  {
+      ul_ch128 = (__m128i *)&ul_ch_estimates_ext[aatx*frame_parms->nb_antennas_rx+aarx][symbol*(off+(nb_rb*NR_NB_SC_PER_RB))];
 
+      for (rb=0;rb < nb_rb_0;rb++) 
+      {
         ul_ch128[0] = _mm_mulhi_epi16(ul_ch128[0], ch_amp128);
         ul_ch128[0] = _mm_slli_epi16(ul_ch128[0], 3);
 
         ul_ch128[1] = _mm_mulhi_epi16(ul_ch128[1], ch_amp128);
         ul_ch128[1] = _mm_slli_epi16(ul_ch128[1], 3);
 
-        if (is_dmrs_symbol) {
-          ul_ch128+=2;
-        } else {
-          ul_ch128[2] = _mm_mulhi_epi16(ul_ch128[2], ch_amp128);
-          ul_ch128[2] = _mm_slli_epi16(ul_ch128[2], 3);
-          ul_ch128+=3;
-
-        }
+        ul_ch128[2] = _mm_mulhi_epi16(ul_ch128[2], ch_amp128);
+        ul_ch128[2] = _mm_slli_epi16(ul_ch128[2], 3);
+        ul_ch128+=3;
       }
-    }
+  }
+}
 #endif
 }
 
@@ -481,6 +507,8 @@ void nr_ulsch_channel_level(int **ul_ch_estimates_ext,
 
   int16_t x = factor2(len);
   int16_t y = (len)>>x;
+  
+  uint32_t nb_rb_0 = len/12 + ((len%12)?1:0);
 
 #ifdef __AVX2__
   int off = ((nb_rb&1) == 1)? 4:0;
@@ -489,13 +517,16 @@ void nr_ulsch_channel_level(int **ul_ch_estimates_ext,
 #endif
 
   for (aatx = 0; aatx < nrOfLayers; aatx++)
-    for (aarx = 0; aarx < frame_parms->nb_antennas_rx; aarx++) {
+  {
+    for (aarx = 0; aarx < frame_parms->nb_antennas_rx; aarx++) 
+    {
       //clear average level
       avg128U = _mm_setzero_si128();
 
       ul_ch128=(__m128i *)&ul_ch_estimates_ext[aatx*frame_parms->nb_antennas_rx+aarx][symbol*(off+(nb_rb*12))];
 
-      for (rb = 0; rb < len/12; rb++) {
+      for (rb = 0; rb < nb_rb_0; rb++) 
+	  {
         avg128U = _mm_add_epi32(avg128U, _mm_srai_epi32(_mm_madd_epi16(ul_ch128[0], ul_ch128[0]), x));
         avg128U = _mm_add_epi32(avg128U, _mm_srai_epi32(_mm_madd_epi16(ul_ch128[1], ul_ch128[1]), x));
         avg128U = _mm_add_epi32(avg128U, _mm_srai_epi32(_mm_madd_epi16(ul_ch128[2], ul_ch128[2]), x));
@@ -508,7 +539,8 @@ void nr_ulsch_channel_level(int **ul_ch_estimates_ext,
                                                     ((int32_t*)&avg128U)[3]) / y;
 
     }
-
+  }
+   
   _mm_empty();
   _m_empty();
 
@@ -520,7 +552,7 @@ void nr_ulsch_channel_level(int **ul_ch_estimates_ext,
   int16x4_t *ul_ch128;
 
   symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
-
+  uint32_t nb_rb_0 = len/12 + ((len%12)?1:0);
   for (aatx=0; aatx<nrOfLayers; aatx++) {
     for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
       //clear average level
@@ -529,7 +561,7 @@ void nr_ulsch_channel_level(int **ul_ch_estimates_ext,
 
       ul_ch128 = (int16x4_t *)&ul_ch_estimates_ext[aatx*frame_parms->nb_antennas_rx+aarx][symbol*frame_parms->N_RB_UL*12];
 
-      for (rb = 0; rb < nb_rb; rb++) {
+      for (rb = 0; rb < nb_rb_0; rb++) {
         //  printf("rb %d : ",rb);
         //  print_shorts("ch",&ul_ch128[0]);
         avg128U = vqaddq_s32(avg128U, vmull_s16(ul_ch128[0], ul_ch128[0]));
@@ -568,14 +600,20 @@ void nr_ulsch_channel_level(int **ul_ch_estimates_ext,
 #endif
 }
 
+
+
+//==============================================================================================
+// Pre-processing for LLR computation
+//==============================================================================================
 void nr_ulsch_channel_compensation(int **rxdataF_ext,
                                    int **ul_ch_estimates_ext,
                                    int **ul_ch_mag,
                                    int **ul_ch_magb,
                                    int **rxdataF_comp,
-                                   int **rho,
+                                   int ***rho,
                                    NR_DL_FRAME_PARMS *frame_parms,
                                    unsigned char symbol,
+                                   int length,
                                    uint8_t is_dmrs_symbol,
                                    unsigned char mod_order,
                                    uint8_t  nrOfLayers,
@@ -637,20 +675,24 @@ void nr_ulsch_channel_compensation(int **rxdataF_ext,
   __m128i mmtmpD0,mmtmpD1,mmtmpD2,mmtmpD3,QAM_amp128={0},QAM_amp128b={0};
   QAM_amp128b = _mm_setzero_si128();
 
-  for (aatx=0; aatx<nrOfLayers; aatx++) {
-
-    if (mod_order == 4) {
+  uint32_t nb_rb_0 = length/12 + ((length%12)?1:0);
+  for (aatx=0; aatx<nrOfLayers; aatx++)
+  {
+    if (mod_order == 4) 
+    {
       QAM_amp128 = _mm_set1_epi16(QAM16_n1);  // 2/sqrt(10)
       QAM_amp128b = _mm_setzero_si128();
-    } else if (mod_order == 6) {
+    } 
+    else if (mod_order == 6)
+    {
       QAM_amp128  = _mm_set1_epi16(QAM64_n1); //
       QAM_amp128b = _mm_set1_epi16(QAM64_n2);
     }
 
     //    printf("comp: rxdataF_comp %p, symbol %d\n",rxdataF_comp[0],symbol);
 
-    for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
-
+    for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) 
+    {
       ul_ch128          = (__m128i *)&ul_ch_estimates_ext[aatx*frame_parms->nb_antennas_rx+aarx][symbol*(off+(nb_rb*12))];
       ul_ch_mag128      = (__m128i *)&ul_ch_mag[aatx*frame_parms->nb_antennas_rx+aarx][symbol*(off+(nb_rb*12))];
       ul_ch_mag128b     = (__m128i *)&ul_ch_magb[aatx*frame_parms->nb_antennas_rx+aarx][symbol*(off+(nb_rb*12))];
@@ -658,8 +700,10 @@ void nr_ulsch_channel_compensation(int **rxdataF_ext,
       rxdataF_comp128   = (__m128i *)&rxdataF_comp[aatx*frame_parms->nb_antennas_rx+aarx][symbol*(off+(nb_rb*12))];
 
 
-      for (rb=0; rb<nb_rb; rb++) {
-        if (mod_order>2) {
+      for (rb=0; rb<nb_rb_0; rb++) 
+      {
+        if (mod_order>2) 
+        {
           // get channel amplitude if not QPSK
 
           //print_shorts("ch:",(int16_t*)&ul_ch128[0]);
@@ -679,6 +723,8 @@ void nr_ulsch_channel_compensation(int **rxdataF_ext,
           ul_ch_mag128[0] = _mm_mulhi_epi16(ul_ch_mag128[0],QAM_amp128);
           ul_ch_mag128[0] = _mm_slli_epi16(ul_ch_mag128[0],1);
 
+          ul_ch_mag128b[0] = _mm_mulhi_epi16(ul_ch_mag128b[0],QAM_amp128b);
+          ul_ch_mag128b[0] = _mm_slli_epi16(ul_ch_mag128b[0],1);
           // print_ints("ch: = ",(int32_t*)&mmtmpD0);
           // print_shorts("QAM_amp:",(int16_t*)&QAM_amp128);
           // print_shorts("mag:",(int16_t*)&ul_ch_mag128[0]);
@@ -687,30 +733,24 @@ void nr_ulsch_channel_compensation(int **rxdataF_ext,
           ul_ch_mag128b[1] = ul_ch_mag128[1];
           ul_ch_mag128[1] = _mm_mulhi_epi16(ul_ch_mag128[1],QAM_amp128);
           ul_ch_mag128[1] = _mm_slli_epi16(ul_ch_mag128[1],1);
+          
+          ul_ch_mag128b[1] = _mm_mulhi_epi16(ul_ch_mag128b[1],QAM_amp128);
+          ul_ch_mag128b[1] = _mm_slli_epi16(ul_ch_mag128b[1],1);
 
-          if (is_dmrs_symbol==0) {
-            mmtmpD0 = _mm_madd_epi16(ul_ch128[2],ul_ch128[2]);
-            mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
-            mmtmpD1 = _mm_packs_epi32(mmtmpD0,mmtmpD0);
-
-            ul_ch_mag128[2] = _mm_unpacklo_epi16(mmtmpD1,mmtmpD1);
-            ul_ch_mag128b[2] = ul_ch_mag128[2];
+          mmtmpD0 = _mm_madd_epi16(ul_ch128[2],ul_ch128[2]);
+          mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+          mmtmpD1 = _mm_packs_epi32(mmtmpD0,mmtmpD0);
 
-            ul_ch_mag128[2] = _mm_mulhi_epi16(ul_ch_mag128[2],QAM_amp128);
-            ul_ch_mag128[2] = _mm_slli_epi16(ul_ch_mag128[2],1);
-          }
+          ul_ch_mag128[2] = _mm_unpacklo_epi16(mmtmpD1,mmtmpD1);
+          ul_ch_mag128b[2] = ul_ch_mag128[2];
 
-          ul_ch_mag128b[0] = _mm_mulhi_epi16(ul_ch_mag128b[0],QAM_amp128b);
-          ul_ch_mag128b[0] = _mm_slli_epi16(ul_ch_mag128b[0],1);
+          ul_ch_mag128[2] = _mm_mulhi_epi16(ul_ch_mag128[2],QAM_amp128);
+          ul_ch_mag128[2] = _mm_slli_epi16(ul_ch_mag128[2],1);
 
 
-          ul_ch_mag128b[1] = _mm_mulhi_epi16(ul_ch_mag128b[1],QAM_amp128b);
-          ul_ch_mag128b[1] = _mm_slli_epi16(ul_ch_mag128b[1],1);
+          ul_ch_mag128b[2] = _mm_mulhi_epi16(ul_ch_mag128b[2],QAM_amp128b);
+          ul_ch_mag128b[2] = _mm_slli_epi16(ul_ch_mag128b[2],1);
 
-          if (is_dmrs_symbol==0) {
-            ul_ch_mag128b[2] = _mm_mulhi_epi16(ul_ch_mag128b[2],QAM_amp128b);
-            ul_ch_mag128b[2] = _mm_slli_epi16(ul_ch_mag128b[2],1);
-          }
         }
 
         // multiply by conjugated channel
@@ -755,117 +795,157 @@ void nr_ulsch_channel_compensation(int **rxdataF_ext,
         //  print_shorts("ch:",ul_ch128+1);
         //  print_shorts("pack:",rxdataF_comp128+1);
 
-        if (is_dmrs_symbol==0) {
-          // multiply by conjugated channel
-          mmtmpD0 = _mm_madd_epi16(ul_ch128[2],rxdataF128[2]);
-          // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
-          mmtmpD1 = _mm_shufflelo_epi16(ul_ch128[2],_MM_SHUFFLE(2,3,0,1));
-          mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
-          mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
-          mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[2]);
-          // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
-          mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
-          mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
-          mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
-          mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
-
-          rxdataF_comp128[2] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
-          //  print_shorts("rx:",rxdataF128+2);
-          //  print_shorts("ch:",ul_ch128+2);
-          //        print_shorts("pack:",rxdataF_comp128+2);
-
-          ul_ch128+=3;
-          ul_ch_mag128+=3;
-          ul_ch_mag128b+=3;
-          rxdataF128+=3;
-          rxdataF_comp128+=3;
-        } else { // we have a smaller PUSCH in symbols with pilots so skip last group of 4 REs and increment less
-          ul_ch128+=2;
-          ul_ch_mag128+=2;
-          ul_ch_mag128b+=2;
-          rxdataF128+=2;
-          rxdataF_comp128+=2;
-        }
-
-      }
-    }
-  }
-
-  if (rho) {
-
-    for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
-      rho128        = (__m128i *)&rho[aarx][symbol*frame_parms->N_RB_UL*12];
-      ul_ch128      = (__m128i *)&ul_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_UL*12];
-      ul_ch128_2    = (__m128i *)&ul_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_UL*12];
-
-      for (rb=0; rb<nb_rb; rb++) {
-        // multiply by conjugated channel
-        mmtmpD0 = _mm_madd_epi16(ul_ch128[0],ul_ch128_2[0]);
-        //  print_ints("re",&mmtmpD0);
-
-        // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
-        mmtmpD1 = _mm_shufflelo_epi16(ul_ch128[0],_MM_SHUFFLE(2,3,0,1));
-        mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
-        mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)&conjugate[0]);
-        //  print_ints("im",&mmtmpD1);
-        mmtmpD1 = _mm_madd_epi16(mmtmpD1,ul_ch128_2[0]);
-        // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
-        mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
-        //  print_ints("re(shift)",&mmtmpD0);
-        mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
-        //  print_ints("im(shift)",&mmtmpD1);
-        mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
-        mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
-        //        print_ints("c0",&mmtmpD2);
-        //  print_ints("c1",&mmtmpD3);
-        rho128[0] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
-
-        //print_shorts("rx:",ul_ch128_2);
-        //print_shorts("ch:",ul_ch128);
-        //print_shorts("pack:",rho128);
-
-        // multiply by conjugated channel
-        mmtmpD0 = _mm_madd_epi16(ul_ch128[1],ul_ch128_2[1]);
-        // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
-        mmtmpD1 = _mm_shufflelo_epi16(ul_ch128[1],_MM_SHUFFLE(2,3,0,1));
-        mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
-        mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
-        mmtmpD1 = _mm_madd_epi16(mmtmpD1,ul_ch128_2[1]);
-        // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
-        mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
-        mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
-        mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
-        mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
-
-
-        rho128[1] =_mm_packs_epi32(mmtmpD2,mmtmpD3);
-        //print_shorts("rx:",ul_ch128_2+1);
-        //print_shorts("ch:",ul_ch128+1);
-        //print_shorts("pack:",rho128+1);
         // multiply by conjugated channel
-        mmtmpD0 = _mm_madd_epi16(ul_ch128[2],ul_ch128_2[2]);
+        mmtmpD0 = _mm_madd_epi16(ul_ch128[2],rxdataF128[2]);
         // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
         mmtmpD1 = _mm_shufflelo_epi16(ul_ch128[2],_MM_SHUFFLE(2,3,0,1));
         mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
         mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
-        mmtmpD1 = _mm_madd_epi16(mmtmpD1,ul_ch128_2[2]);
+        mmtmpD1 = _mm_madd_epi16(mmtmpD1,rxdataF128[2]);
         // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
         mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
         mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
         mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
         mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
 
-        rho128[2] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
-        //print_shorts("rx:",ul_ch128_2+2);
+        rxdataF_comp128[2] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+        //print_shorts("rx:",rxdataF128+2);
         //print_shorts("ch:",ul_ch128+2);
-        //print_shorts("pack:",rho128+2);
+        //print_shorts("pack:",rxdataF_comp128+2);
 
         ul_ch128+=3;
-        ul_ch128_2+=3;
-        rho128+=3;
-
+        ul_ch_mag128+=3;
+        ul_ch_mag128b+=3;
+        rxdataF128+=3;
+        rxdataF_comp128+=3;
       }
+    }
+  }
 
+  if (rho) {
+    //we compute the Tx correlation matrix for each Rx antenna
+    //As an example the 2x2 MIMO case requires
+    //rho[aarx][nb_aatx*nb_aatx] = [cov(H_aarx_0,H_aarx_0) cov(H_aarx_0,H_aarx_1)
+    //                              cov(H_aarx_1,H_aarx_0) cov(H_aarx_1,H_aarx_1)], aarx=0,...,nb_antennas_rx-1
+
+    int avg_rho_re[frame_parms->nb_antennas_rx][nrOfLayers*nrOfLayers];
+    int avg_rho_im[frame_parms->nb_antennas_rx][nrOfLayers*nrOfLayers];
+
+    for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) 
+    {
+      for (aatx=0; aatx < nrOfLayers; aatx++) 
+      {
+        ul_ch128      = (__m128i *)&ul_ch_estimates_ext[aatx*frame_parms->nb_antennas_rx+aarx][symbol*(off+(nb_rb*12))];
+      
+        for (int atx=0; atx< nrOfLayers; atx++) 
+        {
+          avg_rho_re[aarx][aatx*nrOfLayers+atx] = 0;
+          avg_rho_im[aarx][aatx*nrOfLayers+atx] = 0;
+          rho128        = (__m128i *)&rho[aarx][aatx*nrOfLayers+atx][symbol*(off+(nb_rb*12))];
+          ul_ch128_2    = (__m128i *)&ul_ch_estimates_ext[atx*frame_parms->nb_antennas_rx+aarx][symbol*(off+(nb_rb*12))];
+
+          for (rb=0; rb<nb_rb_0; rb++) 
+          {
+            // multiply by conjugated channel
+            mmtmpD0 = _mm_madd_epi16(ul_ch128[0],ul_ch128_2[0]);
+            //  print_ints("re",&mmtmpD0);
+
+            // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+            mmtmpD1 = _mm_shufflelo_epi16(ul_ch128[0],_MM_SHUFFLE(2,3,0,1));
+            mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+            mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)&conjugate[0]);
+            //  print_ints("im",&mmtmpD1);
+            mmtmpD1 = _mm_madd_epi16(mmtmpD1,ul_ch128_2[0]);
+            // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+            mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+            //  print_ints("re(shift)",&mmtmpD0);
+            mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
+            //  print_ints("im(shift)",&mmtmpD1);
+            mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+            mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+            //        print_ints("c0",&mmtmpD2);
+            //  print_ints("c1",&mmtmpD3);
+            rho128[0] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+
+            //print_shorts("rx:",ul_ch128_2);
+            //print_shorts("ch:",ul_ch128);
+            //print_shorts("pack:",rho128);
+
+            avg_rho_re[aarx][aatx*nrOfLayers+atx] +=(((int16_t*)&rho128[0])[0]+
+              ((int16_t*)&rho128[0])[2] +
+              ((int16_t*)&rho128[0])[4] +
+              ((int16_t*)&rho128[0])[6])/16;//
+
+            avg_rho_im[aarx][aatx*nrOfLayers+atx] +=(((int16_t*)&rho128[0])[1]+
+              ((int16_t*)&rho128[0])[3] +
+              ((int16_t*)&rho128[0])[5] +
+              ((int16_t*)&rho128[0])[7])/16;//
+            // multiply by conjugated channel
+            mmtmpD0 = _mm_madd_epi16(ul_ch128[1],ul_ch128_2[1]);
+            // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+            mmtmpD1 = _mm_shufflelo_epi16(ul_ch128[1],_MM_SHUFFLE(2,3,0,1));
+            mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+            mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
+            mmtmpD1 = _mm_madd_epi16(mmtmpD1,ul_ch128_2[1]);
+            // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+            mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+            mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
+            mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+            mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+            rho128[1] =_mm_packs_epi32(mmtmpD2,mmtmpD3);
+            //print_shorts("rx:",ul_ch128_2+1);
+            //print_shorts("ch:",ul_ch128+1);
+            //print_shorts("pack:",rho128+1);
+
+            // multiply by conjugated channel
+            avg_rho_re[aarx][aatx*nrOfLayers+atx] +=(((int16_t*)&rho128[1])[0]+
+              ((int16_t*)&rho128[1])[2] +
+              ((int16_t*)&rho128[1])[4] +
+              ((int16_t*)&rho128[1])[6])/16;
+
+            avg_rho_im[aarx][aatx*nrOfLayers+atx] +=(((int16_t*)&rho128[1])[1]+
+              ((int16_t*)&rho128[1])[3] +
+              ((int16_t*)&rho128[1])[5] +
+              ((int16_t*)&rho128[1])[7])/16;
+
+            mmtmpD0 = _mm_madd_epi16(ul_ch128[2],ul_ch128_2[2]);
+            // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+            mmtmpD1 = _mm_shufflelo_epi16(ul_ch128[2],_MM_SHUFFLE(2,3,0,1));
+            mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+            mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
+            mmtmpD1 = _mm_madd_epi16(mmtmpD1,ul_ch128_2[2]);
+            // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+            mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+            mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
+            mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+            mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+
+            rho128[2] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+            //print_shorts("rx:",ul_ch128_2+2);
+            //print_shorts("ch:",ul_ch128+2);
+            //print_shorts("pack:",rho128+2);
+            avg_rho_re[aarx][aatx*nrOfLayers+atx] +=(((int16_t*)&rho128[2])[0]+
+              ((int16_t*)&rho128[2])[2] +
+              ((int16_t*)&rho128[2])[4] +
+              ((int16_t*)&rho128[2])[6])/16;
+
+            avg_rho_im[aarx][aatx*nrOfLayers+atx] +=(((int16_t*)&rho128[2])[1]+
+              ((int16_t*)&rho128[2])[3] +
+              ((int16_t*)&rho128[2])[5] +
+              ((int16_t*)&rho128[2])[7])/16;
+
+            ul_ch128+=3;
+            ul_ch128_2+=3;
+            rho128+=3;
+          }
+          if (is_dmrs_symbol==1) {
+            //measurements->rx_correlation[0][0][aarx] = signal_energy(&rho[aarx][aatx*nb_aatx+atx][symbol*nb_rb*12],rb*12);
+            avg_rho_re[aarx][aatx*nrOfLayers+atx] = 16*avg_rho_re[aarx][aatx*nrOfLayers+atx]/(nb_rb*12);
+            avg_rho_im[aarx][aatx*nrOfLayers+atx] = 16*avg_rho_im[aarx][aatx*nrOfLayers+atx]/(nb_rb*12);
+            //printf("rho[rx]%d tx%d tx%d = Re: %d Im: %d\n",aarx, aatx,atx, avg_rho_re[aarx][aatx*nb_aatx+atx], avg_rho_im[aarx][aatx*nb_aatx+atx]);
+          }
+        }
+      }
     }
   }
 
@@ -1105,19 +1185,23 @@ void nr_ulsch_channel_compensation(int **rxdataF_ext,
 }
 
 void nr_ulsch_detection_mrc(NR_DL_FRAME_PARMS *frame_parms,
-			    int32_t **rxdataF_comp,
-			    int32_t **ul_ch_mag,
-			    int32_t **ul_ch_magb,
-			    uint8_t symbol,
-			    uint16_t nb_rb) {
+                int32_t **rxdataF_comp,
+                int32_t **ul_ch_mag,
+                int32_t **ul_ch_magb,
+                int32_t ***rho,                
+                uint8_t  nrOfLayers,
+                uint8_t symbol,
+                uint16_t nb_rb,
+                int length) {
   int n_rx = frame_parms->nb_antennas_rx;
 #if defined(__x86_64__) || defined(__i386__)
-  __m128i *rxdataF_comp128[1+n_rx],*ul_ch_mag128[1+n_rx],*ul_ch_mag128b[1+n_rx];
+  __m128i *rxdataF_comp128[2],*ul_ch_mag128[2],*ul_ch_mag128b[2];
 #elif defined(__arm__)
   int16x8_t *rxdataF_comp128_0,*ul_ch_mag128_0,*ul_ch_mag128_0b;
   int16x8_t *rxdataF_comp128_1,*ul_ch_mag128_1,*ul_ch_mag128_1b;
 #endif
   int32_t i;
+  uint32_t nb_rb_0 = length/12 + ((length%12)?1:0);
 
 #ifdef __AVX2__
   int off = ((nb_rb&1) == 1)? 4:0;
@@ -1125,23 +1209,34 @@ void nr_ulsch_detection_mrc(NR_DL_FRAME_PARMS *frame_parms,
   int off = 0;
 #endif
 
-  if (frame_parms->nb_antennas_rx>1) {
-#if defined(__x86_64__) || defined(__i386__)
-    int nb_re = nb_rb*12;
-    for (int aa=0;aa<frame_parms->nb_antennas_rx;aa++) {
-      rxdataF_comp128[aa]   = (__m128i *)&rxdataF_comp[aa][(symbol*(nb_re + off))];
-      ul_ch_mag128[aa]      = (__m128i *)&ul_ch_mag[aa][(symbol*(nb_re + off))];
-      ul_ch_mag128b[aa]     = (__m128i *)&ul_ch_magb[aa][(symbol*(nb_re + off))];
-    }
-    for (int aa=1;aa<frame_parms->nb_antennas_rx;aa++) {      
-      // MRC on each re of rb, both on MF output and magnitude (for 16QAM/64QAM llr computation)
-      for (i=0; i<nb_rb*3; i++) {
-	rxdataF_comp128[0][i] = _mm_adds_epi16(rxdataF_comp128[0][i],rxdataF_comp128[aa][i]);
-	ul_ch_mag128[0][i]    = _mm_adds_epi16(ul_ch_mag128[0][i], ul_ch_mag128[aa][i]);
-	ul_ch_mag128b[0][i]   = _mm_adds_epi16(ul_ch_mag128b[0][i],ul_ch_mag128b[aa][i]);
+  if (n_rx > 1) 
+  {
+    #if defined(__x86_64__) || defined(__i386__)
+    for (int aatx=0; aatx<nrOfLayers; aatx++)
+    {      
+      int nb_re = nb_rb*12;
+
+      rxdataF_comp128[0]   = (__m128i *)&rxdataF_comp[aatx*frame_parms->nb_antennas_rx][(symbol*(nb_re + off))];
+      ul_ch_mag128[0]      = (__m128i *)&ul_ch_mag[aatx*frame_parms->nb_antennas_rx][(symbol*(nb_re + off))];
+      ul_ch_mag128b[0]     = (__m128i *)&ul_ch_magb[aatx*frame_parms->nb_antennas_rx][(symbol*(nb_re + off))];
+
+      for (int aa=1;aa < n_rx;aa++) 
+      {
+        rxdataF_comp128[1]   = (__m128i *)&rxdataF_comp[aatx*frame_parms->nb_antennas_rx+aa][(symbol*(nb_re + off))];
+        ul_ch_mag128[1]      = (__m128i *)&ul_ch_mag[aatx*frame_parms->nb_antennas_rx+aa][(symbol*(nb_re + off))];
+        ul_ch_mag128b[1]     = (__m128i *)&ul_ch_magb[aatx*frame_parms->nb_antennas_rx+aa][(symbol*(nb_re + off))];
+      
+        // MRC on each re of rb, both on MF output and magnitude (for 16QAM/64QAM llr computation)
+        for (i=0; i<nb_rb_0*3; i++) 
+        {
+            rxdataF_comp128[0][i] = _mm_adds_epi16(rxdataF_comp128[0][i],rxdataF_comp128[1][i]);
+            ul_ch_mag128[0][i]    = _mm_adds_epi16(ul_ch_mag128[0][i],ul_ch_mag128[1][i]);
+            ul_ch_mag128b[0][i]   = _mm_adds_epi16(ul_ch_mag128b[0][i],ul_ch_mag128b[1][i]);
+            //rxdataF_comp128[0][i] = _mm_add_epi16(rxdataF_comp128_0[i],(*(__m128i *)&jitterc[0]));
+        }
       }
     }
-#elif defined(__arm__)
+    #elif defined(__arm__)
     rxdataF_comp128_0   = (int16x8_t *)&rxdataF_comp[0][symbol*frame_parms->N_RB_DL*12];
     rxdataF_comp128_1   = (int16x8_t *)&rxdataF_comp[1][symbol*frame_parms->N_RB_DL*12];
     ul_ch_mag128_0      = (int16x8_t *)&ul_ch_mag[0][symbol*frame_parms->N_RB_DL*12];
@@ -1156,7 +1251,7 @@ void nr_ulsch_detection_mrc(NR_DL_FRAME_PARMS *frame_parms,
       ul_ch_mag128_0b[i]   = vhaddq_s16(ul_ch_mag128_0b[i],ul_ch_mag128_1b[i]);
       rxdataF_comp128_0[i] = vqaddq_s16(rxdataF_comp128_0[i],(*(int16x8_t *)&jitterc[0]));
     }
-#endif
+    #endif
   }
 
 #if defined(__x86_64__) || defined(__i386__)
@@ -1165,6 +1260,696 @@ void nr_ulsch_detection_mrc(NR_DL_FRAME_PARMS *frame_parms,
 #endif
 }
 
+/* Zero Forcing Rx function: nr_det_HhH()
+ *
+ *
+ * */
+void nr_ulsch_det_HhH(int32_t *after_mf_00,//a
+                int32_t *after_mf_01,//b
+                int32_t *after_mf_10,//c
+                int32_t *after_mf_11,//d
+                int32_t *det_fin,//1/ad-bc
+                unsigned short nb_rb,
+                unsigned char symbol,
+                int32_t shift)
+{
+  int16_t nr_conjug2[8]__attribute__((aligned(16))) = {1,-1,1,-1,1,-1,1,-1} ;
+  unsigned short rb;
+  __m128i *after_mf_00_128,*after_mf_01_128, *after_mf_10_128, *after_mf_11_128, ad_re_128, bc_re_128; //ad_im_128, bc_im_128;
+  __m128i *det_fin_128, det_re_128; //det_im_128, tmp_det0, tmp_det1;
+
+  after_mf_00_128 = (__m128i *)after_mf_00;
+  after_mf_01_128 = (__m128i *)after_mf_01;
+  after_mf_10_128 = (__m128i *)after_mf_10;
+  after_mf_11_128 = (__m128i *)after_mf_11;
+
+  det_fin_128 = (__m128i *)det_fin;
+
+  for (rb=0; rb<3*nb_rb; rb++) {
+
+    //complex multiplication (I_a+jQ_a)(I_d+jQ_d) = (I_aI_d - Q_aQ_d) + j(Q_aI_d + I_aQ_d)
+    //The imag part is often zero, we compute only the real part
+    ad_re_128 = _mm_sign_epi16(after_mf_00_128[0],*(__m128i*)&nr_conjug2[0]);
+    ad_re_128 = _mm_madd_epi16(ad_re_128,after_mf_11_128[0]); //Re: I_a0*I_d0 - Q_a1*Q_d1
+    //ad_im_128 = _mm_shufflelo_epi16(after_mf_00_128[0],_MM_SHUFFLE(2,3,0,1));//permutes IQs for the low 64 bits as [I_a0 Q_a1 I_a2 Q_a3]_64bits to [Q_a1 I_a0 Q_a3 I_a2]_64bits
+    //ad_im_128 = _mm_shufflehi_epi16(ad_im_128,_MM_SHUFFLE(2,3,0,1));//permutes IQs for the high 64 bits as [I_a0 Q_a1 I_a2 Q_a3]_64bits to [Q_a1 I_a0 Q_a3 I_a2]_64bits
+    //ad_im_128 = _mm_madd_epi16(ad_im_128,after_mf_11_128[0]);//Im: (Q_aI_d + I_aQ_d)
+
+    //complex multiplication (I_b+jQ_b)(I_c+jQ_c) = (I_bI_c - Q_bQ_c) + j(Q_bI_c + I_bQ_c)
+    //The imag part is often zero, we compute only the real part
+    bc_re_128 = _mm_sign_epi16(after_mf_01_128[0],*(__m128i*)&nr_conjug2[0]);
+    bc_re_128 = _mm_madd_epi16(bc_re_128,after_mf_10_128[0]); //Re: I_b0*I_c0 - Q_b1*Q_c1
+    //bc_im_128 = _mm_shufflelo_epi16(after_mf_01_128[0],_MM_SHUFFLE(2,3,0,1));//permutes IQs for the low 64 bits as [I_b0 Q_b1 I_b2 Q_b3]_64bits to [Q_b1 I_b0 Q_b3 I_b2]_64bits
+    //bc_im_128 = _mm_shufflehi_epi16(bc_im_128,_MM_SHUFFLE(2,3,0,1));//permutes IQs for the high 64 bits as [I_b0 Q_b1 I_b2 Q_b3]_64bits to [Q_b1 I_b0 Q_b3 I_b2]_64bits
+    //bc_im_128 = _mm_madd_epi16(bc_im_128,after_mf_10_128[0]);//Im: (Q_bI_c + I_bQ_c)
+
+    det_re_128 = _mm_sub_epi32(ad_re_128, bc_re_128);
+    //det_im_128 = _mm_sub_epi32(ad_im_128, bc_im_128);
+
+    //det in Q30 format
+    det_fin_128[0] = _mm_abs_epi32(det_re_128);
+
+
+#ifdef DEBUG_DLSCH_DEMOD
+     printf("\n Computing det_HhH_inv \n");
+     //print_ints("det_re_128:",(int32_t*)&det_re_128);
+     //print_ints("det_im_128:",(int32_t*)&det_im_128);
+     print_ints("det_fin_128:",(int32_t*)&det_fin_128[0]);
+#endif
+    det_fin_128+=1;
+    after_mf_00_128+=1;
+    after_mf_01_128+=1;
+    after_mf_10_128+=1;
+    after_mf_11_128+=1;
+  }
+  _mm_empty();
+  _m_empty();
+}
+
+/* Zero Forcing Rx function: nr_inv_comp_muli
+ * Complex number multi: z = x*y
+ *                         = (x_re*y_re - x_im*y_im) + j(x_im*y_re + x_re*y_im)
+ * */
+__m128i nr_ulsch_inv_comp_muli(__m128i input_x,
+                         __m128i input_y)
+{
+  int16_t nr_conjug2[8]__attribute__((aligned(16))) = {1,-1,1,-1,1,-1,1,-1} ;
+
+  __m128i xy_re_128, xy_im_128;
+  __m128i output_z, tmp_z0, tmp_z1;
+
+  // complex multiplication (x_re + jx_im)*(y_re + jy_im) = (x_re*y_re - x_im*y_im) + j(x_im*y_re + x_re*y_im)
+
+  // the real part
+  xy_re_128 = _mm_sign_epi16(input_x,*(__m128i*)&nr_conjug2[0]);
+  xy_re_128 = _mm_madd_epi16(xy_re_128,input_y); //Re: (x_re*y_re - x_im*y_im)
+
+  // the imag part
+  xy_im_128 = _mm_shufflelo_epi16(input_x,_MM_SHUFFLE(2,3,0,1));//permutes IQs for the low 64 bits as [I_a0 Q_a1 I_a2 Q_a3]_64bits to [Q_a1 I_a0 Q_a3 I_a2]_64bits
+  xy_im_128 = _mm_shufflehi_epi16(xy_im_128,_MM_SHUFFLE(2,3,0,1));//permutes IQs for the high 64 bits as [I_a0 Q_a1 I_a2 Q_a3]_64bits to [Q_a1 I_a0 Q_a3 I_a2]_64bits
+  xy_im_128 = _mm_madd_epi16(xy_im_128,input_y);//Im: (x_im*y_re + x_re*y_im)
+
+  //convert back to Q15 before packing
+  xy_re_128 = _mm_srai_epi32(xy_re_128,4);//(2^15/64*2*16)
+  xy_im_128 = _mm_srai_epi32(xy_im_128,4);
+
+  tmp_z0  = _mm_unpacklo_epi32(xy_re_128,xy_im_128);
+  //print_ints("unpack lo:",&tmp_z0[0]);
+  tmp_z1  = _mm_unpackhi_epi32(xy_re_128,xy_im_128);
+  //print_ints("unpack hi:",&tmp_z1[0]);
+  output_z = _mm_packs_epi32(tmp_z0,tmp_z1);
+
+  _mm_empty();
+  _m_empty();
+  return(output_z);
+}
+
+/* Zero Forcing Rx function: nr_conjch0_mult_ch1()
+ *
+ *
+ * */
+void nr_ulsch_conjch0_mult_ch1(int *ch0,
+                         int *ch1,
+                         int32_t *ch0conj_ch1,
+                         unsigned short nb_rb,
+                         unsigned char output_shift0)
+{
+  //This function is used to compute multiplications in H_hermitian * H matrix
+  short nr_conjugate[8]__attribute__((aligned(16))) = {-1,1,-1,1,-1,1,-1,1};
+  unsigned short rb;
+  __m128i *dl_ch0_128,*dl_ch1_128, *ch0conj_ch1_128, mmtmpD0,mmtmpD1,mmtmpD2,mmtmpD3;
+
+  dl_ch0_128 = (__m128i *)ch0;
+  dl_ch1_128 = (__m128i *)ch1;
+
+  ch0conj_ch1_128 = (__m128i *)ch0conj_ch1;
+
+  for (rb=0; rb<3*nb_rb; rb++) {
+
+    mmtmpD0 = _mm_madd_epi16(dl_ch0_128[0],dl_ch1_128[0]);
+    mmtmpD1 = _mm_shufflelo_epi16(dl_ch0_128[0],_MM_SHUFFLE(2,3,0,1));
+    mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+    mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)&nr_conjugate[0]);
+    mmtmpD1 = _mm_madd_epi16(mmtmpD1,dl_ch1_128[0]);
+    mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift0);
+    mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift0);
+    mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+    mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+
+    ch0conj_ch1_128[0] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+
+    /*printf("\n Computing conjugates \n");
+    print_shorts("ch0:",(int16_t*)&dl_ch0_128[0]);
+    print_shorts("ch1:",(int16_t*)&dl_ch1_128[0]);
+    print_shorts("pack:",(int16_t*)&ch0conj_ch1_128[0]);*/
+
+    dl_ch0_128+=1;
+    dl_ch1_128+=1;
+    ch0conj_ch1_128+=1;
+  }
+  _mm_empty();
+  _m_empty();
+}
+__m128i nr_ulsch_comp_muli_sum(__m128i input_x,
+                         __m128i input_y,
+                         __m128i input_w,
+                         __m128i input_z,
+                         __m128i det)
+{
+  int16_t nr_conjug2[8]__attribute__((aligned(16))) = {1,-1,1,-1,1,-1,1,-1} ;
+
+  __m128i xy_re_128, xy_im_128, wz_re_128, wz_im_128;
+  __m128i output, tmp_z0, tmp_z1;
+
+  // complex multiplication (x_re + jx_im)*(y_re + jy_im) = (x_re*y_re - x_im*y_im) + j(x_im*y_re + x_re*y_im)
+  // the real part
+  xy_re_128 = _mm_sign_epi16(input_x,*(__m128i*)&nr_conjug2[0]);
+  xy_re_128 = _mm_madd_epi16(xy_re_128,input_y); //Re: (x_re*y_re - x_im*y_im)
+
+  // the imag part
+  xy_im_128 = _mm_shufflelo_epi16(input_x,_MM_SHUFFLE(2,3,0,1));//permutes IQs for the low 64 bits as [I_a0 Q_a1 I_a2 Q_a3]_64bits to [Q_a1 I_a0 Q_a3 I_a2]_64bits
+  xy_im_128 = _mm_shufflehi_epi16(xy_im_128,_MM_SHUFFLE(2,3,0,1));//permutes IQs for the high 64 bits as [I_a0 Q_a1 I_a2 Q_a3]_64bits to [Q_a1 I_a0 Q_a3 I_a2]_64bits
+  xy_im_128 = _mm_madd_epi16(xy_im_128,input_y);//Im: (x_im*y_re + x_re*y_im)
+
+  // complex multiplication (w_re + jw_im)*(z_re + jz_im) = (w_re*z_re - w_im*z_im) + j(w_im*z_re + w_re*z_im)
+  // the real part
+  wz_re_128 = _mm_sign_epi16(input_w,*(__m128i*)&nr_conjug2[0]);
+  wz_re_128 = _mm_madd_epi16(wz_re_128,input_z); //Re: (w_re*z_re - w_im*z_im)
+
+  // the imag part
+  wz_im_128 = _mm_shufflelo_epi16(input_w,_MM_SHUFFLE(2,3,0,1));//permutes IQs for the low 64 bits as [I_a0 Q_a1 I_a2 Q_a3]_64bits to [Q_a1 I_a0 Q_a3 I_a2]_64bits
+  wz_im_128 = _mm_shufflehi_epi16(wz_im_128,_MM_SHUFFLE(2,3,0,1));//permutes IQs for the high 64 bits as [I_a0 Q_a1 I_a2 Q_a3]_64bits to [Q_a1 I_a0 Q_a3 I_a2]_64bits
+  wz_im_128 = _mm_madd_epi16(wz_im_128,input_z);//Im: (w_im*z_re + w_re*z_im)
+
+
+  xy_re_128 = _mm_sub_epi32(xy_re_128, wz_re_128);
+  xy_im_128 = _mm_sub_epi32(xy_im_128, wz_im_128);
+  //print_ints("rx_re:",(int32_t*)&xy_re_128[0]);
+  //print_ints("rx_Img:",(int32_t*)&xy_im_128[0]);
+  //divide by matrix det and convert back to Q15 before packing
+  int sum_det =0;
+  for (int k=0; k<4;k++) {
+    sum_det += ((((int *)&det[0])[k])>>2);
+    //printf("det_%d = %d log2 =%d \n",k,(((int *)&det[0])[k]),log2_approx(((int *)&det[0])[k]));
+    }
+
+  xy_re_128 = _mm_slli_epi32(xy_re_128,5);
+  xy_re_128 = _mm_srai_epi32(xy_re_128,log2_approx(sum_det));
+  xy_re_128 = _mm_slli_epi32(xy_re_128,5);
+
+  xy_im_128 = _mm_slli_epi32(xy_im_128,5);
+  xy_im_128 = _mm_srai_epi32(xy_im_128,log2_approx(sum_det));
+  xy_im_128 = _mm_slli_epi32(xy_im_128,5);
+
+  tmp_z0  = _mm_unpacklo_epi32(xy_re_128,xy_im_128);
+  //print_ints("unpack lo:",&tmp_z0[0]);
+  tmp_z1  = _mm_unpackhi_epi32(xy_re_128,xy_im_128);
+  //print_ints("unpack hi:",&tmp_z1[0]);
+  output = _mm_packs_epi32(tmp_z0,tmp_z1);
+
+  _mm_empty();
+  _m_empty();
+  return(output);
+}
+/* Zero Forcing Rx function: nr_construct_HhH_elements()
+ *
+ *
+ * */
+void nr_ulsch_construct_HhH_elements(int *conjch00_ch00,
+                               int *conjch01_ch01,
+                               int *conjch11_ch11,
+                               int *conjch10_ch10,//
+                               int *conjch20_ch20,
+                               int *conjch21_ch21,
+                               int *conjch30_ch30,
+                               int *conjch31_ch31,
+                               int *conjch00_ch01,//00_01
+                               int *conjch01_ch00,//01_00
+                               int *conjch10_ch11,//10_11
+                               int *conjch11_ch10,//11_10
+                               int *conjch20_ch21,
+                               int *conjch21_ch20,
+                               int *conjch30_ch31,
+                               int *conjch31_ch30,
+                               int32_t *after_mf_00,
+                               int32_t *after_mf_01,
+                               int32_t *after_mf_10,
+                               int32_t *after_mf_11,
+                               unsigned short nb_rb,
+                               unsigned char symbol)
+{
+  //This function is used to construct the (H_hermitian * H matrix) matrix elements
+  unsigned short rb;
+  __m128i *conjch00_ch00_128, *conjch01_ch01_128, *conjch11_ch11_128, *conjch10_ch10_128;
+  __m128i *conjch20_ch20_128, *conjch21_ch21_128, *conjch30_ch30_128, *conjch31_ch31_128;
+  __m128i *conjch00_ch01_128, *conjch01_ch00_128, *conjch10_ch11_128, *conjch11_ch10_128;
+  __m128i *conjch20_ch21_128, *conjch21_ch20_128, *conjch30_ch31_128, *conjch31_ch30_128;
+  __m128i *after_mf_00_128, *after_mf_01_128, *after_mf_10_128, *after_mf_11_128;
+
+  conjch00_ch00_128 = (__m128i *)conjch00_ch00;
+  conjch01_ch01_128 = (__m128i *)conjch01_ch01;
+  conjch11_ch11_128 = (__m128i *)conjch11_ch11;
+  conjch10_ch10_128 = (__m128i *)conjch10_ch10;
+
+  conjch20_ch20_128 = (__m128i *)conjch20_ch20;
+  conjch21_ch21_128 = (__m128i *)conjch21_ch21;
+  conjch30_ch30_128 = (__m128i *)conjch30_ch30;
+  conjch31_ch31_128 = (__m128i *)conjch31_ch31;
+
+  conjch00_ch01_128 = (__m128i *)conjch00_ch01;
+  conjch01_ch00_128 = (__m128i *)conjch01_ch00;
+  conjch10_ch11_128 = (__m128i *)conjch10_ch11;
+  conjch11_ch10_128 = (__m128i *)conjch11_ch10;
+
+  conjch20_ch21_128 = (__m128i *)conjch20_ch21;
+  conjch21_ch20_128 = (__m128i *)conjch21_ch20;
+  conjch30_ch31_128 = (__m128i *)conjch30_ch31;
+  conjch31_ch30_128 = (__m128i *)conjch31_ch30;
+
+  after_mf_00_128 = (__m128i *)after_mf_00;
+  after_mf_01_128 = (__m128i *)after_mf_01;
+  after_mf_10_128 = (__m128i *)after_mf_10;
+  after_mf_11_128 = (__m128i *)after_mf_11;
+
+  for (rb=0; rb<3*nb_rb; rb++) {
+
+    after_mf_00_128[0] =_mm_adds_epi16(conjch00_ch00_128[0],conjch10_ch10_128[0]);//00_00 + 10_10
+    if (conjch20_ch20 != NULL) after_mf_00_128[0] =_mm_adds_epi16(after_mf_00_128[0],conjch20_ch20_128[0]);
+    if (conjch30_ch30 != NULL) after_mf_00_128[0] =_mm_adds_epi16(after_mf_00_128[0],conjch30_ch30_128[0]);
+
+    after_mf_11_128[0] =_mm_adds_epi16(conjch01_ch01_128[0], conjch11_ch11_128[0]); //01_01 + 11_11
+    if (conjch21_ch21 != NULL) after_mf_11_128[0] =_mm_adds_epi16(after_mf_11_128[0],conjch21_ch21_128[0]);
+    if (conjch31_ch31 != NULL) after_mf_11_128[0] =_mm_adds_epi16(after_mf_11_128[0],conjch31_ch31_128[0]);
+
+    after_mf_01_128[0] =_mm_adds_epi16(conjch00_ch01_128[0], conjch10_ch11_128[0]);//00_01 + 10_11
+    if (conjch20_ch21 != NULL) after_mf_01_128[0] =_mm_adds_epi16(after_mf_01_128[0],conjch20_ch21_128[0]);
+    if (conjch30_ch31 != NULL) after_mf_01_128[0] =_mm_adds_epi16(after_mf_01_128[0],conjch30_ch31_128[0]);
+
+    after_mf_10_128[0] =_mm_adds_epi16(conjch01_ch00_128[0], conjch11_ch10_128[0]);//01_00 + 11_10
+    if (conjch21_ch20 != NULL) after_mf_10_128[0] =_mm_adds_epi16(after_mf_10_128[0],conjch21_ch20_128[0]);
+    if (conjch31_ch30 != NULL) after_mf_10_128[0] =_mm_adds_epi16(after_mf_10_128[0],conjch31_ch30_128[0]);
+
+#ifdef DEBUG_DLSCH_DEMOD
+    if ((rb<=30))
+    {
+      printf(" \n construct_HhH_elements \n");
+      print_shorts("after_mf_00_128:",(int16_t*)&after_mf_00_128[0]);
+      print_shorts("after_mf_01_128:",(int16_t*)&after_mf_01_128[0]);
+      print_shorts("after_mf_10_128:",(int16_t*)&after_mf_10_128[0]);
+      print_shorts("after_mf_11_128:",(int16_t*)&after_mf_11_128[0]);
+    }
+#endif
+    conjch00_ch00_128+=1;
+    conjch10_ch10_128+=1;
+    conjch01_ch01_128+=1;
+    conjch11_ch11_128+=1;
+
+    if (conjch20_ch20 != NULL) conjch20_ch20_128+=1;
+    if (conjch21_ch21 != NULL) conjch21_ch21_128+=1;
+    if (conjch30_ch30 != NULL) conjch30_ch30_128+=1;
+    if (conjch31_ch31 != NULL) conjch31_ch31_128+=1;
+
+    conjch00_ch01_128+=1;
+    conjch01_ch00_128+=1;
+    conjch10_ch11_128+=1;
+    conjch11_ch10_128+=1;
+
+    if (conjch20_ch21 != NULL) conjch20_ch21_128+=1;
+    if (conjch21_ch20 != NULL) conjch21_ch20_128+=1;
+    if (conjch30_ch31 != NULL) conjch30_ch31_128+=1;
+    if (conjch31_ch30 != NULL) conjch31_ch30_128+=1;
+
+    after_mf_00_128 += 1;
+    after_mf_01_128 += 1;
+    after_mf_10_128 += 1;
+    after_mf_11_128 += 1;
+  }
+  _mm_empty();
+  _m_empty();
+}
+
+/* Zero Forcing Rx function: nr_ulsch_zero_forcing_rx_2layers()
+ *
+ *
+ * */
+uint8_t nr_ulsch_zero_forcing_rx_2layers(int **rxdataF_comp,
+                                   int **ul_ch_mag,
+                                   int **ul_ch_magb,                                   
+                                   int **ul_ch_estimates_ext,
+                                   unsigned short nb_rb,
+                                   unsigned char n_rx,
+                                   unsigned char mod_order,
+                                   int shift,
+                                   unsigned char symbol,
+                                   int length)
+{
+  int *ch00, *ch01, *ch10, *ch11;
+  int *ch20, *ch30, *ch21, *ch31;
+  uint32_t nb_rb_0 = length/12 + ((length%12)?1:0);
+
+  #ifdef __AVX2__
+  int off = ((nb_rb&1) == 1)? 4:0;
+  #else
+  int off = 0;
+  #endif
+
+  /* we need at least alignment to 16 bytes, let's put 32 to be sure
+   * (maybe not necessary but doesn't hurt)
+   */
+  int32_t conjch00_ch01[12*nb_rb] __attribute__((aligned(32)));
+  int32_t conjch01_ch00[12*nb_rb] __attribute__((aligned(32)));
+  int32_t conjch10_ch11[12*nb_rb] __attribute__((aligned(32)));
+  int32_t conjch11_ch10[12*nb_rb] __attribute__((aligned(32)));
+  int32_t conjch00_ch00[12*nb_rb] __attribute__((aligned(32)));
+  int32_t conjch01_ch01[12*nb_rb] __attribute__((aligned(32)));
+  int32_t conjch10_ch10[12*nb_rb] __attribute__((aligned(32)));
+  int32_t conjch11_ch11[12*nb_rb] __attribute__((aligned(32)));
+  int32_t conjch20_ch20[12*nb_rb] __attribute__((aligned(32)));
+  int32_t conjch21_ch21[12*nb_rb] __attribute__((aligned(32)));
+  int32_t conjch30_ch30[12*nb_rb] __attribute__((aligned(32)));
+  int32_t conjch31_ch31[12*nb_rb] __attribute__((aligned(32)));
+  int32_t conjch20_ch21[12*nb_rb] __attribute__((aligned(32)));
+  int32_t conjch30_ch31[12*nb_rb] __attribute__((aligned(32)));
+  int32_t conjch21_ch20[12*nb_rb] __attribute__((aligned(32)));
+  int32_t conjch31_ch30[12*nb_rb] __attribute__((aligned(32)));
+
+  int32_t af_mf_00[12*nb_rb] __attribute__((aligned(32)));
+  int32_t af_mf_01[12*nb_rb] __attribute__((aligned(32)));
+  int32_t af_mf_10[12*nb_rb] __attribute__((aligned(32)));
+  int32_t af_mf_11[12*nb_rb] __attribute__((aligned(32)));
+  int32_t determ_fin[12*nb_rb] __attribute__((aligned(32)));
+
+  switch (n_rx) {
+    case 2://
+      ch00 = (int *)&ul_ch_estimates_ext[0][symbol*(off+nb_rb*12)];
+      ch01 = (int *)&ul_ch_estimates_ext[2][symbol*(off+nb_rb*12)];
+      ch10 = (int *)&ul_ch_estimates_ext[1][symbol*(off+nb_rb*12)];
+      ch11 = (int *)&ul_ch_estimates_ext[3][symbol*(off+nb_rb*12)];
+      ch20 = NULL;
+      ch21 = NULL;
+      ch30 = NULL;
+      ch31 = NULL;
+      break;
+
+    case 4://
+      ch00 = (int *)&ul_ch_estimates_ext[0][symbol*(off+nb_rb*12)];
+      ch01 = (int *)&ul_ch_estimates_ext[4][symbol*(off+nb_rb*12)];
+      ch10 = (int *)&ul_ch_estimates_ext[1][symbol*(off+nb_rb*12)];
+      ch11 = (int *)&ul_ch_estimates_ext[5][symbol*(off+nb_rb*12)];
+      ch20 = (int *)&ul_ch_estimates_ext[2][symbol*(off+nb_rb*12)];
+      ch21 = (int *)&ul_ch_estimates_ext[6][symbol*(off+nb_rb*12)];
+      ch30 = (int *)&ul_ch_estimates_ext[3][symbol*(off+nb_rb*12)];
+      ch31 = (int *)&ul_ch_estimates_ext[7][symbol*(off+nb_rb*12)];
+      break;
+
+    default:
+      return -1;
+      break;
+  }
+
+  /* 1- Compute the rx channel matrix after compensation: (1/2^log2_max)x(H_herm x H)
+   * for n_rx = 2
+   * |conj_H_00       conj_H_10|    | H_00         H_01|   |(conj_H_00xH_00+conj_H_10xH_10)   (conj_H_00xH_01+conj_H_10xH_11)|
+   * |                         |  x |                  | = |                                                                 |
+   * |conj_H_01       conj_H_11|    | H_10         H_11|   |(conj_H_01xH_00+conj_H_11xH_10)   (conj_H_01xH_01+conj_H_11xH_11)|
+   *
+   */
+
+  if (n_rx>=2){
+    // (1/2^log2_maxh)*conj_H_00xH_00: (1/(64*2))conjH_00*H_00*2^15
+    nr_ulsch_conjch0_mult_ch1(ch00,
+                        ch00,
+                        conjch00_ch00,
+                        nb_rb_0,
+                        shift);
+    // (1/2^log2_maxh)*conj_H_10xH_10: (1/(64*2))conjH_10*H_10*2^15
+    nr_ulsch_conjch0_mult_ch1(ch10,
+                        ch10,
+                        conjch10_ch10,
+                        nb_rb_0,
+                        shift);
+    // conj_H_00xH_01
+    nr_ulsch_conjch0_mult_ch1(ch00,
+                        ch01,
+                        conjch00_ch01,
+                        nb_rb_0,
+                        shift); // this shift is equal to the channel level log2_maxh
+    // conj_H_10xH_11
+    nr_ulsch_conjch0_mult_ch1(ch10,
+                        ch11,
+                        conjch10_ch11,
+                        nb_rb_0,
+                        shift);
+    // conj_H_01xH_01
+    nr_ulsch_conjch0_mult_ch1(ch01,
+                        ch01,
+                        conjch01_ch01,
+                        nb_rb_0,
+                        shift);
+    // conj_H_11xH_11
+    nr_ulsch_conjch0_mult_ch1(ch11,
+                        ch11,
+                        conjch11_ch11,
+                        nb_rb_0,
+                        shift);
+    // conj_H_01xH_00
+    nr_ulsch_conjch0_mult_ch1(ch01,
+                        ch00,
+                        conjch01_ch00,
+                        nb_rb_0,
+                        shift);
+    // conj_H_11xH_10
+    nr_ulsch_conjch0_mult_ch1(ch11,
+                        ch10,
+                        conjch11_ch10,
+                        nb_rb_0,
+                        shift);
+  }
+  if (n_rx==4){
+    // (1/2^log2_maxh)*conj_H_20xH_20: (1/(64*2*16))conjH_20*H_20*2^15
+    nr_ulsch_conjch0_mult_ch1(ch20,
+                        ch20,
+                        conjch20_ch20,
+                        nb_rb_0,
+                        shift);
+
+    // (1/2^log2_maxh)*conj_H_30xH_30: (1/(64*2*4))conjH_30*H_30*2^15
+    nr_ulsch_conjch0_mult_ch1(ch30,
+                        ch30,
+                        conjch30_ch30,
+                        nb_rb_0,
+                        shift);
+
+    // (1/2^log2_maxh)*conj_H_20xH_20: (1/(64*2))conjH_20*H_20*2^15
+    nr_ulsch_conjch0_mult_ch1(ch20,
+                        ch21,
+                        conjch20_ch21,
+                        nb_rb_0,
+                        shift);
+
+    nr_ulsch_conjch0_mult_ch1(ch30,
+                        ch31,
+                        conjch30_ch31,
+                        nb_rb_0,
+                        shift);
+
+    nr_ulsch_conjch0_mult_ch1(ch21,
+                        ch21,
+                        conjch21_ch21,
+                        nb_rb_0,
+                        shift);
+
+    nr_ulsch_conjch0_mult_ch1(ch31,
+                        ch31,
+                        conjch31_ch31,
+                        nb_rb_0,
+                        shift);
+
+    // (1/2^log2_maxh)*conj_H_20xH_20: (1/(64*2))conjH_20*H_20*2^15
+    nr_ulsch_conjch0_mult_ch1(ch21,
+                        ch20,
+                        conjch21_ch20,
+                        nb_rb_0,
+                        shift);
+
+    nr_ulsch_conjch0_mult_ch1(ch31,
+                        ch30,
+                        conjch31_ch30,
+                        nb_rb_0,
+                        shift);
+
+    nr_ulsch_construct_HhH_elements(conjch00_ch00,
+                              conjch01_ch01,
+                              conjch11_ch11,
+                              conjch10_ch10,//
+                              conjch20_ch20,
+                              conjch21_ch21,
+                              conjch30_ch30,
+                              conjch31_ch31,
+                              conjch00_ch01,
+                              conjch01_ch00,
+                              conjch10_ch11,
+                              conjch11_ch10,//
+                              conjch20_ch21,
+                              conjch21_ch20,
+                              conjch30_ch31,
+                              conjch31_ch30,
+                              af_mf_00,
+                              af_mf_01,
+                              af_mf_10,
+                              af_mf_11,
+                              nb_rb_0,
+                              symbol);
+  }
+  if (n_rx==2){
+    nr_ulsch_construct_HhH_elements(conjch00_ch00,
+                              conjch01_ch01,
+                              conjch11_ch11,
+                              conjch10_ch10,//
+                              NULL,
+                              NULL,
+                              NULL,
+                              NULL,
+                              conjch00_ch01,
+                              conjch01_ch00,
+                              conjch10_ch11,
+                              conjch11_ch10,//
+                              NULL,
+                              NULL,
+                              NULL,
+                              NULL,
+                              af_mf_00,
+                              af_mf_01,
+                              af_mf_10,
+                              af_mf_11,
+                              nb_rb_0,
+                              symbol);
+  }
+  //det_HhH = ad -bc
+  nr_ulsch_det_HhH(af_mf_00,//a
+             af_mf_01,//b
+             af_mf_10,//c
+             af_mf_11,//d
+             determ_fin,
+             nb_rb_0,
+             symbol,
+             shift);
+  /* 2- Compute the channel matrix inversion **********************************
+   *
+     *    |(conj_H_00xH_00+conj_H_10xH_10)   (conj_H_00xH_01+conj_H_10xH_11)|
+     * A= |                                                                 |
+     *    |(conj_H_01xH_00+conj_H_11xH_10)   (conj_H_01xH_01+conj_H_11xH_11)|
+     *
+     *
+     *
+     *inv(A) =(1/det)*[d  -b
+     *                 -c  a]
+     *
+     *
+     **************************************************************************/
+  __m128i *rxdataF_comp128_0,*rxdataF_comp128_1,*ul_ch_mag128_0=NULL,*ul_ch_mag128b_0=NULL,*determ_fin_128;//*dl_ch_mag128_1,*dl_ch_mag128b_1,*dl_ch_mag128r_1
+  __m128i mmtmpD0,mmtmpD1,mmtmpD2,mmtmpD3;
+  __m128i *after_mf_a_128,*after_mf_b_128, *after_mf_c_128, *after_mf_d_128;
+  __m128i QAM_amp128={0},QAM_amp128b={0};
+
+  determ_fin_128      = (__m128i *)&determ_fin[0];
+
+  rxdataF_comp128_0   = (__m128i *)&rxdataF_comp[0][symbol*(off+nb_rb*12)];//aatx=0 @ aarx =0
+  rxdataF_comp128_1   = (__m128i *)&rxdataF_comp[n_rx][symbol*(off+nb_rb*12)];//aatx=1 @ aarx =0
+
+  after_mf_a_128 = (__m128i *)af_mf_00;
+  after_mf_b_128 = (__m128i *)af_mf_01;
+  after_mf_c_128 = (__m128i *)af_mf_10;
+  after_mf_d_128 = (__m128i *)af_mf_11;
+
+  if (mod_order>2) {
+    if (mod_order == 4) {
+      QAM_amp128 = _mm_set1_epi16(QAM16_n1);  //2/sqrt(10)
+      QAM_amp128b = _mm_setzero_si128();
+    } else if (mod_order == 6) {
+      QAM_amp128  = _mm_set1_epi16(QAM64_n1); //4/sqrt{42}
+      QAM_amp128b = _mm_set1_epi16(QAM64_n2); //2/sqrt{42}
+    } 
+    ul_ch_mag128_0      = (__m128i *)&ul_ch_mag[0][symbol*(off+nb_rb*12)];
+    ul_ch_mag128b_0     = (__m128i *)&ul_ch_magb[0][symbol*(off+nb_rb*12)];
+  }
+
+  for (int rb=0; rb<3*nb_rb_0; rb++) {
+    if (mod_order>2) {
+      int sum_det =0;
+      for (int k=0; k<4;k++) {
+        sum_det += ((((int *)&determ_fin_128[0])[k])>>2);
+        //printf("det_%d = %d\n",k,sum_det);
+        }
+
+      mmtmpD2 = _mm_slli_epi32(determ_fin_128[0],5);
+      mmtmpD2 = _mm_srai_epi32(mmtmpD2,log2_approx(sum_det));
+      mmtmpD2 = _mm_slli_epi32(mmtmpD2,5);
+
+      mmtmpD3 = _mm_unpacklo_epi32(mmtmpD2,mmtmpD2);
+
+      mmtmpD2 = _mm_unpackhi_epi32(mmtmpD2,mmtmpD2);
+
+      mmtmpD2 = _mm_packs_epi32(mmtmpD3,mmtmpD2);
+
+      ul_ch_mag128_0[0] = mmtmpD2;
+      ul_ch_mag128b_0[0] = mmtmpD2;
+
+      ul_ch_mag128_0[0] = _mm_mulhi_epi16(ul_ch_mag128_0[0],QAM_amp128);
+      ul_ch_mag128_0[0] = _mm_slli_epi16(ul_ch_mag128_0[0],1);
+
+      ul_ch_mag128b_0[0] = _mm_mulhi_epi16(ul_ch_mag128b_0[0],QAM_amp128b);
+      ul_ch_mag128b_0[0] = _mm_slli_epi16(ul_ch_mag128b_0[0],1);
+
+      //print_shorts("mag layer 1:",(int16_t*)&dl_ch_mag128_0[0]);
+      //print_shorts("mag layer 2:",(int16_t*)&dl_ch_mag128_1[0]);
+      //print_shorts("magb layer 1:",(int16_t*)&dl_ch_mag128b_0[0]);
+      //print_shorts("magb layer 2:",(int16_t*)&dl_ch_mag128b_1[0]);
+      //print_shorts("magr layer 1:",(int16_t*)&dl_ch_mag128r_0[0]);
+      //print_shorts("magr layer 2:",(int16_t*)&dl_ch_mag128r_1[0]);
+    }
+    // multiply by channel Inv
+    //rxdataF_zf128_0 = rxdataF_comp128_0*d - b*rxdataF_comp128_1
+    //rxdataF_zf128_1 = rxdataF_comp128_1*a - c*rxdataF_comp128_0
+    //printf("layer_1 \n");
+    mmtmpD0 = nr_ulsch_comp_muli_sum(rxdataF_comp128_0[0],
+                               after_mf_d_128[0],
+                               rxdataF_comp128_1[0],
+                               after_mf_b_128[0],
+                               determ_fin_128[0]);
+
+    //printf("layer_2 \n");
+    mmtmpD1 = nr_ulsch_comp_muli_sum(rxdataF_comp128_1[0],
+                               after_mf_a_128[0],
+                               rxdataF_comp128_0[0],
+                               after_mf_c_128[0],
+                               determ_fin_128[0]);
+
+    rxdataF_comp128_0[0] = mmtmpD0;
+    rxdataF_comp128_1[0] = mmtmpD1;
+#ifdef DEBUG_DLSCH_DEMOD
+    printf("\n Rx signal after ZF l%d rb%d\n",symbol,rb);
+    print_shorts(" Rx layer 1:",(int16_t*)&rxdataF_comp128_0[0]);
+    print_shorts(" Rx layer 2:",(int16_t*)&rxdataF_comp128_1[0]);
+#endif
+    determ_fin_128 += 1;
+    ul_ch_mag128_0 += 1;
+    ul_ch_mag128b_0 += 1;    
+    rxdataF_comp128_0 += 1;
+    rxdataF_comp128_1 += 1;
+    after_mf_a_128 += 1;
+    after_mf_b_128 += 1;
+    after_mf_c_128 += 1;
+    after_mf_d_128 += 1;
+  }
+  _mm_empty();
+  _m_empty();
+   return(0);
+}
+
+//==============================================================================================
+
+/* Main Function */
 int nr_rx_pusch(PHY_VARS_gNB *gNB,
                 uint8_t ulsch_id,
                 uint32_t frame,
@@ -1184,21 +1969,29 @@ int nr_rx_pusch(PHY_VARS_gNB *gNB,
   gNB->pusch_vars[ulsch_id]->cl_done = 0;
 
   bwp_start_subcarrier = ((rel15_ul->rb_start + rel15_ul->bwp_start)*NR_NB_SC_PER_RB + frame_parms->first_carrier_offset) % frame_parms->ofdm_symbol_size;
-  LOG_D(PHY,"pusch %d.%d : bwp_start_subcarrier %d, rb_start %d, first_carrier_offset %d\n", frame,slot,bwp_start_subcarrier, rel15_ul->rb_start, frame_parms->first_carrier_offset);
-  LOG_D(PHY,"pusch %d.%d : ul_dmrs_symb_pos %x\n",frame,slot,rel15_ul->ul_dmrs_symb_pos);
-
+  LOG_I(PHY,"pusch %d.%d : bwp_start_subcarrier %d, rb_start %d, first_carrier_offset %d\n", frame,slot,bwp_start_subcarrier, rel15_ul->rb_start, frame_parms->first_carrier_offset);
+  LOG_I(PHY,"pusch %d.%d : ul_dmrs_symb_pos %x\n",frame,slot,rel15_ul->ul_dmrs_symb_pos);
+  LOG_I(PHY,"ulsch RX %x : start_rb %d nb_rb %d mcs %d Nl %d Tpmi %d bwp_start %d start_sc %d start_symbol %d num_symbols %d cdmgrpsnodata %d num_dmrs %d dmrs_ports %d\n",
+          rel15_ul->rnti,rel15_ul->rb_start,rel15_ul->rb_size,rel15_ul->mcs_index,
+          rel15_ul->nrOfLayers,0,rel15_ul->bwp_start,0,rel15_ul->start_symbol_index,rel15_ul->nr_of_symbols,
+          rel15_ul->num_dmrs_cdm_grps_no_data,rel15_ul->ul_dmrs_symb_pos,rel15_ul->dmrs_ports);
   //----------------------------------------------------------
   //--------------------- Channel estimation ---------------------
   //----------------------------------------------------------
   start_meas(&gNB->ulsch_channel_estimation_stats);
-  for(uint8_t symbol = rel15_ul->start_symbol_index; symbol < (rel15_ul->start_symbol_index + rel15_ul->nr_of_symbols); symbol++) {
+  for(uint8_t symbol = rel15_ul->start_symbol_index; symbol < (rel15_ul->start_symbol_index + rel15_ul->nr_of_symbols); symbol++) 
+  {
     uint8_t dmrs_symbol_flag = (rel15_ul->ul_dmrs_symb_pos >> symbol) & 0x01;
     LOG_D(PHY, "symbol %d, dmrs_symbol_flag :%d\n", symbol, dmrs_symbol_flag);
-    if (dmrs_symbol_flag == 1) {
+    
+    if (dmrs_symbol_flag == 1) 
+    {
       if (gNB->pusch_vars[ulsch_id]->dmrs_symbol == INVALID_VALUE)
         gNB->pusch_vars[ulsch_id]->dmrs_symbol = symbol;
 
       for (int nl=0; nl<rel15_ul->nrOfLayers; nl++)
+      {
+        
         nr_pusch_channel_estimation(gNB,
                                     slot,
                                     get_dmrs_port(nl,rel15_ul->dmrs_ports),
@@ -1206,6 +1999,7 @@ int nr_rx_pusch(PHY_VARS_gNB *gNB,
                                     ulsch_id,
                                     bwp_start_subcarrier,
                                     rel15_ul);
+      }
 
       nr_gnb_measurements(gNB, ulsch_id, harq_pid, symbol,rel15_ul->nrOfLayers);
 
@@ -1214,9 +2008,12 @@ int nr_rx_pusch(PHY_VARS_gNB *gNB,
           gNB->pusch_vars[ulsch_id]->ulsch_power[aarx] = 0;
           gNB->pusch_vars[ulsch_id]->ulsch_noise_power[aarx] = 0;
         }
-        gNB->pusch_vars[ulsch_id]->ulsch_power[aarx] += signal_energy_nodc(
-            &gNB->pusch_vars[ulsch_id]->ul_ch_estimates[aarx][symbol * frame_parms->ofdm_symbol_size],
+        for (aatx = 0; aatx < rel15_ul->nrOfLayers; aatx++)
+        { 
+          gNB->pusch_vars[ulsch_id]->ulsch_power[aarx] += signal_energy_nodc(
+            &gNB->pusch_vars[ulsch_id]->ul_ch_estimates[aatx*gNB->frame_parms.nb_antennas_rx+aarx][symbol * frame_parms->ofdm_symbol_size],
             rel15_ul->rb_size * 12);
+        }
         for (int rb = 0; rb < rel15_ul->rb_size; rb++) {
           gNB->pusch_vars[ulsch_id]->ulsch_noise_power[aarx] +=
               gNB->measurements.n0_subband_power[aarx][rel15_ul->bwp_start + rel15_ul->rb_start + rb] /
@@ -1234,23 +2031,29 @@ int nr_rx_pusch(PHY_VARS_gNB *gNB,
 #endif
   uint32_t rxdataF_ext_offset = 0;
 
-  for(uint8_t symbol = rel15_ul->start_symbol_index; symbol < (rel15_ul->start_symbol_index + rel15_ul->nr_of_symbols); symbol++) {
+  for(uint8_t symbol = rel15_ul->start_symbol_index; symbol < (rel15_ul->start_symbol_index + rel15_ul->nr_of_symbols); symbol++) 
+  {
     uint8_t dmrs_symbol_flag = (rel15_ul->ul_dmrs_symb_pos >> symbol) & 0x01;
-    if (dmrs_symbol_flag == 1) {
+    if (dmrs_symbol_flag == 1) 
+    {
       if ((rel15_ul->ul_dmrs_symb_pos >> ((symbol + 1) % frame_parms->symbols_per_slot)) & 0x01)
         AssertFatal(1==0,"Double DMRS configuration is not yet supported\n");
 
       gNB->pusch_vars[ulsch_id]->dmrs_symbol = symbol;
 
-      if (rel15_ul->dmrs_config_type == 0) {
+      if (rel15_ul->dmrs_config_type == 0) 
+      {
         // if no data in dmrs cdm group is 1 only even REs have no data
         // if no data in dmrs cdm group is 2 both odd and even REs have no data
         nb_re_pusch = rel15_ul->rb_size *(12 - (rel15_ul->num_dmrs_cdm_grps_no_data*6));
       }
-      else {
+      else 
+      {
         nb_re_pusch = rel15_ul->rb_size *(12 - (rel15_ul->num_dmrs_cdm_grps_no_data*4));
       }
-    } else {
+    } 
+    else 
+    {
       nb_re_pusch = rel15_ul->rb_size * NR_NB_SC_PER_RB;
     }
 
@@ -1260,16 +2063,16 @@ int nr_rx_pusch(PHY_VARS_gNB *gNB,
     //----------------------------------------------------------
     //--------------------- RBs extraction ---------------------
     //----------------------------------------------------------
-    if (nb_re_pusch > 0) {
-
+    if (nb_re_pusch > 0) 
+    {
       start_meas(&gNB->ulsch_rbs_extraction_stats);
-      nr_ulsch_extract_rbs_single(gNB->common_vars.rxdataF,
-                                  gNB->pusch_vars[ulsch_id],
-                                  slot,
-                                  symbol,
-                                  dmrs_symbol_flag,
-                                  rel15_ul,
-                                  frame_parms);
+      nr_ulsch_extract_rbs(gNB->common_vars.rxdataF,
+                           gNB->pusch_vars[ulsch_id],
+                           slot,
+                           symbol,
+                           dmrs_symbol_flag,
+                           rel15_ul,
+                           frame_parms);
       stop_meas(&gNB->ulsch_rbs_extraction_stats);
 
       //----------------------------------------------------------
@@ -1280,8 +2083,9 @@ int nr_rx_pusch(PHY_VARS_gNB *gNB,
                             gNB->ulsch[ulsch_id],
                             symbol,
                             dmrs_symbol_flag,
-                            rel15_ul->rb_size,
-                            rel15_ul->dmrs_config_type);
+                            nb_re_pusch,
+                            rel15_ul->nrOfLayers,
+                            rel15_ul->rb_size);
 
       if (gNB->pusch_vars[ulsch_id]->cl_done==0) {
         nr_ulsch_channel_level(gNB->pusch_vars[ulsch_id]->ul_ch_estimates_ext,
@@ -1312,9 +2116,14 @@ int nr_rx_pusch(PHY_VARS_gNB *gNB,
                                     gNB->pusch_vars[ulsch_id]->ul_ch_mag0,
                                     gNB->pusch_vars[ulsch_id]->ul_ch_magb0,
                                     gNB->pusch_vars[ulsch_id]->rxdataF_comp,
+                                    #ifdef SUPPORT_PMI_MATRIC
                                     (rel15_ul->nrOfLayers>1) ? gNB->pusch_vars[ulsch_id]->rho : NULL,
+                                    #else
+                                    NULL,
+                                    #endif
                                     frame_parms,
                                     symbol,
+                                    nb_re_pusch,
                                     dmrs_symbol_flag,
                                     rel15_ul->qam_mod_order,
                                     rel15_ul->nrOfLayers,
@@ -1327,19 +2136,37 @@ int nr_rx_pusch(PHY_VARS_gNB *gNB,
                              gNB->pusch_vars[ulsch_id]->rxdataF_comp,
                              gNB->pusch_vars[ulsch_id]->ul_ch_mag0,
                              gNB->pusch_vars[ulsch_id]->ul_ch_magb0,
+                             #ifdef SUPPORT_PMI_MATRIC
+                             (rel15_ul->nrOfLayers>1) ? gNB->pusch_vars[ulsch_id]->rho : NULL,
+                             #else
+                             NULL,
+                             #endif
+                             rel15_ul->nrOfLayers,
                              symbol,
-                             rel15_ul->rb_size);
+                             rel15_ul->rb_size,
+                             nb_re_pusch);
+                 
+      if (rel15_ul->nrOfLayers == 2)//Apply zero forcing for 2 Tx layers
+        nr_ulsch_zero_forcing_rx_2layers(gNB->pusch_vars[ulsch_id]->rxdataF_comp,
+                                   gNB->pusch_vars[ulsch_id]->ul_ch_mag0,
+                                   gNB->pusch_vars[ulsch_id]->ul_ch_magb0,                                   
+                                   gNB->pusch_vars[ulsch_id]->ul_ch_estimates_ext,
+                                   rel15_ul->rb_size,
+                                   frame_parms->nb_antennas_rx,
+                                   rel15_ul->qam_mod_order,
+                                   gNB->pusch_vars[ulsch_id]->log2_maxh,
+                                   symbol,
+                                   nb_re_pusch);
       stop_meas(&gNB->ulsch_mrc_stats);
 
-      // transform precoding = 0 means enabled
-      if (rel15_ul->transform_precoding == 0) {
-
-      #ifdef __AVX2__
+      if (rel15_ul->transformPrecoder == transformPrecoder_enabled)      
+      {
+         #ifdef __AVX2__
         // For odd number of resource blocks need byte alignment to multiple of 8
         int nb_re_pusch2 = nb_re_pusch + (nb_re_pusch&7);
-      #else
+        #else
         int nb_re_pusch2 = nb_re_pusch;
-      #endif
+        #endif
 
         // perform IDFT operation on the compensated rxdata if transform precoding is enabled
         nr_idft(&gNB->pusch_vars[ulsch_id]->rxdataF_comp[0][symbol * nb_re_pusch2], nb_re_pusch);
@@ -1351,7 +2178,8 @@ int nr_rx_pusch(PHY_VARS_gNB *gNB,
       //----------------------------------------------------------
       /* In case PTRS is enabled then LLR will be calculated after PTRS symbols are processed *
       * otherwise LLR are calculated for each symbol based upon DMRS channel estimates only. */
-      if (rel15_ul->pdu_bit_map & PUSCH_PDU_BITMAP_PUSCH_PTRS) {
+      if (rel15_ul->pdu_bit_map & PUSCH_PDU_BITMAP_PUSCH_PTRS) 
+      {
         start_meas(&gNB->ulsch_ptrs_processing_stats);
         nr_pusch_ptrs_processing(gNB,
                                  frame_parms,
@@ -1370,14 +2198,17 @@ int nr_rx_pusch(PHY_VARS_gNB *gNB,
       /*--------------------  LLRs computation  -------------------------------------------------------------*/
       /*-----------------------------------------------------------------------------------------------------*/
       start_meas(&gNB->ulsch_llr_stats);
-      nr_ulsch_compute_llr(&gNB->pusch_vars[ulsch_id]->rxdataF_comp[0][symbol * (off + rel15_ul->rb_size * NR_NB_SC_PER_RB)],
-                           gNB->pusch_vars[ulsch_id]->ul_ch_mag0,
-                           gNB->pusch_vars[ulsch_id]->ul_ch_magb0,
-                           &gNB->pusch_vars[ulsch_id]->llr[rxdataF_ext_offset * rel15_ul->qam_mod_order],
-                           rel15_ul->rb_size,
-                           gNB->pusch_vars[ulsch_id]->ul_valid_re_per_slot[symbol],
-                           symbol,
-                           rel15_ul->qam_mod_order);
+      for (aatx=0; aatx < rel15_ul->nrOfLayers; aatx++)
+      {
+        nr_ulsch_compute_llr(&gNB->pusch_vars[ulsch_id]->rxdataF_comp[aatx*frame_parms->nb_antennas_rx][symbol * (off + rel15_ul->rb_size * NR_NB_SC_PER_RB)],
+                             gNB->pusch_vars[ulsch_id]->ul_ch_mag0[aatx*frame_parms->nb_antennas_rx],
+                             gNB->pusch_vars[ulsch_id]->ul_ch_magb0[aatx*frame_parms->nb_antennas_rx],
+                             &gNB->pusch_vars[ulsch_id]->llr_layers[aatx][rxdataF_ext_offset * rel15_ul->qam_mod_order],
+                             rel15_ul->rb_size,
+                             gNB->pusch_vars[ulsch_id]->ul_valid_re_per_slot[symbol],
+                             symbol,
+                             rel15_ul->qam_mod_order);
+      }
       stop_meas(&gNB->ulsch_llr_stats);
       rxdataF_ext_offset += gNB->pusch_vars[ulsch_id]->ul_valid_re_per_slot[symbol];
     }
diff --git a/openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c b/openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c
index bf96a761060..ad41ebf3d6d 100644
--- a/openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c
+++ b/openair1/PHY/NR_TRANSPORT/nr_ulsch_llr_computation.c
@@ -64,7 +64,7 @@ void nr_ulsch_qpsk_llr(int32_t *rxdataF_comp,
 //----------------------------------------------------------------------------------------------
 
 void nr_ulsch_16qam_llr(int32_t *rxdataF_comp,
-                        int32_t **ul_ch_mag,
+                        int32_t *ul_ch_mag,
                         int16_t  *ulsch_llr,
                         uint32_t nb_rb,
                         uint32_t nb_re,
@@ -110,12 +110,12 @@ void nr_ulsch_16qam_llr(int32_t *rxdataF_comp,
 
 #if defined(__x86_64__) || defined(__i386__)
 #ifdef __AVX2__
-    ch_mag = (__m256i*)&ul_ch_mag[0][(symbol*(off+(nb_rb*12)))];
+    ch_mag = (__m256i*)&ul_ch_mag[(symbol*(off+(nb_rb*12)))];
 #else
-    ch_mag = (__m128i*)&ul_ch_mag[0][(symbol*(off+(nb_rb*12)))];
+    ch_mag = (__m128i*)&ul_ch_mag[(symbol*(off+(nb_rb*12)))];
 #endif
 #elif defined(__arm__)
-  ch_mag = (int16x8_t*)&ul_ch_mag[0][(symbol*nb_rb*12)];
+  ch_mag = (int16x8_t*)&ul_ch_mag[(symbol*nb_rb*12)];
 #endif
 
 #ifdef __AVX2__
@@ -231,8 +231,8 @@ void nr_ulsch_16qam_llr(int32_t *rxdataF_comp,
 //----------------------------------------------------------------------------------------------
 
 void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
-                        int32_t **ul_ch_mag,
-                        int32_t **ul_ch_magb,
+                        int32_t *ul_ch_mag,
+                        int32_t *ul_ch_magb,
                         int16_t  *ulsch_llr,
                         uint32_t nb_rb,
                         uint32_t nb_re,
@@ -265,15 +265,15 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
 
 #if defined(__x86_64__) || defined(__i386__)
 #ifdef __AVX2__
-  ch_mag = (__m256i*)&ul_ch_mag[0][(symbol*(off+(nb_rb*12)))];
-  ch_magb = (__m256i*)&ul_ch_magb[0][(symbol*(off+(nb_rb*12)))];
+  ch_mag = (__m256i*)&ul_ch_mag[(symbol*(off+(nb_rb*12)))];
+  ch_magb = (__m256i*)&ul_ch_magb[(symbol*(off+(nb_rb*12)))];
 #else
-  ch_mag = (__m128i*)&ul_ch_mag[0][(symbol*nb_rb*12)];
-  ch_magb = (__m128i*)&ul_ch_magb[0][(symbol*nb_rb*12)];
+  ch_mag = (__m128i*)&ul_ch_mag[(symbol*nb_rb*12)];
+  ch_magb = (__m128i*)&ul_ch_magb[(symbol*nb_rb*12)];
 #endif
 #elif defined(__arm__)
-  ch_mag = (int16x8_t*)&ul_ch_mag[0][(symbol*nb_rb*12)];
-  ch_magb = (int16x8_t*)&ul_ch_magb[0][(symbol*nb_rb*12)];
+  ch_mag = (int16x8_t*)&ul_ch_mag[(symbol*nb_rb*12)];
+  ch_magb = (int16x8_t*)&ul_ch_magb[(symbol*nb_rb*12)];
 #endif
 
 #ifdef __AVX2__
@@ -471,8 +471,8 @@ void nr_ulsch_64qam_llr(int32_t *rxdataF_comp,
 
 
 void nr_ulsch_compute_llr(int32_t *rxdataF_comp,
-                          int32_t **ul_ch_mag,
-                          int32_t **ul_ch_magb,
+                          int32_t *ul_ch_mag,
+                          int32_t *ul_ch_magb,
                           int16_t *ulsch_llr,
                           uint32_t nb_rb,
                           uint32_t nb_re,
diff --git a/openair1/PHY/NR_UE_ESTIMATION/filt16a_32.c b/openair1/PHY/NR_UE_ESTIMATION/filt16a_32.c
index 65ae2882520..2cb7609d622 100644
--- a/openair1/PHY/NR_UE_ESTIMATION/filt16a_32.c
+++ b/openair1/PHY/NR_UE_ESTIMATION/filt16a_32.c
@@ -148,7 +148,7 @@ short filt8_dcr0_h[8]= {
 0,4096,8192,12288,16384,0,0,0};
 
 short filt8_l1[8] = {
-24576,16384,0,0,0,0,0,0};
+24576,16384,8192,0,0,0,0,0};
 
 short filt8_ml1[8] = {
 -8192,0,8192,16384,8192,0,0,0};
@@ -163,10 +163,10 @@ short filt8_mm1[8]= {
 0,0,0,0,8192,16384,8192,0};
 
 short filt8_dcl1[8]= {
-0,0,0,16384,12288,8192,4096,0};
+0,0,16384,12288,8192,4096,0,0};
 
 short filt8_dcr1[8]= {
-0,0,0,0,4096,8192,12288,16384};
+0,0,0,4096,8192,12288,16384,0};
 
 short filt8_dcl1_h[8]= {
 0,16384,12288,8192,4096,0,0,0};
diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_ulsch_coding.c b/openair1/PHY/NR_UE_TRANSPORT/nr_ulsch_coding.c
index 945dc516737..a293eec6d8d 100644
--- a/openair1/PHY/NR_UE_TRANSPORT/nr_ulsch_coding.c
+++ b/openair1/PHY/NR_UE_TRANSPORT/nr_ulsch_coding.c
@@ -258,6 +258,15 @@ int nr_ulsch_encoding(PHY_VARS_NR_UE *ue,
   Ilbrm = 0;
   Tbslbrm = 950984; //max tbs
   Coderate = 0.0;
+  
+  #if 0
+  harq_process->a[0] = 0x31;
+  for (int i = 1; i < harq_process->pusch_pdu.pusch_data.tb_size; i++) 
+  {
+    harq_process->a[i] = (i&0xff);
+  }
+  #endif
+  
   trace_NRpdu(DIRECTION_UPLINK, harq_process->a, harq_process->pusch_pdu.pusch_data.tb_size, 0, WS_C_RNTI, 0, 0, 0,0, 0);
 ///////////
 /////////////////////////////////////////////////////////////////////////////////////////  
diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_ulsch_ue.c b/openair1/PHY/NR_UE_TRANSPORT/nr_ulsch_ue.c
index 99dc2b658e7..5d76e654ba1 100644
--- a/openair1/PHY/NR_UE_TRANSPORT/nr_ulsch_ue.c
+++ b/openair1/PHY/NR_UE_TRANSPORT/nr_ulsch_ue.c
@@ -98,13 +98,12 @@ void nr_ue_ulsch_procedures(PHY_VARS_NR_UE *UE,
                                uint8_t thread_id,
                                int gNB_id) {
 
-  LOG_D(PHY,"nr_ue_ulsch_procedures hard_id %d %d.%d\n",harq_pid,frame,slot);
+  LOG_I(PHY,"nr_ue_ulsch_procedures hard_id %d %d.%d\n",harq_pid,frame,slot);
 
   uint32_t available_bits;
   uint8_t cwd_index, l;
   uint32_t scrambled_output[NR_MAX_NB_CODEWORDS][NR_MAX_PDSCH_ENCODED_LENGTH>>5];
-  int16_t **tx_layers;
-  int32_t **txdataF;
+  	  
   int8_t Wf[2], Wt[2], l_prime[2], delta;
   uint8_t nb_dmrs_re_per_rb;
   int ap, i;
@@ -112,6 +111,9 @@ void nr_ue_ulsch_procedures(PHY_VARS_NR_UE *UE,
 
   NR_DL_FRAME_PARMS *frame_parms = &UE->frame_parms;
   NR_UE_PUSCH *pusch_ue = UE->pusch_vars[thread_id][gNB_id];
+  int16_t **tx_layers = (int16_t **)pusch_ue->txdataF_layers;
+  int16_t **tx_precoding = (int16_t **)pusch_ue->txdataF_precoding;
+  int32_t **txdataF = UE->common_vars.txdataF;
 
   uint8_t  num_of_codewords = 1; // tmp assumption
   int      Nid_cell = 0;
@@ -148,9 +150,9 @@ void nr_ue_ulsch_procedures(PHY_VARS_NR_UE *UE,
 
     nb_dmrs_re_per_rb = ((dmrs_type == pusch_dmrs_type1) ? 6:4)*cdm_grps_no_data;
 
-    LOG_D(PHY,"ulsch %x : start_rb %d bwp_start %d start_sc %d start_symbol %d num_symbols %d cdmgrpsnodata %d num_dmrs %d dmrs_re_per_rb %d\n",
-          rnti,start_rb,pusch_pdu->bwp_start,start_sc,start_symbol,number_of_symbols,cdm_grps_no_data,number_dmrs_symbols,nb_dmrs_re_per_rb);
-
+    LOG_I(PHY,"ulsch TX %x : start_rb %d nb_rb %d mod_order %d Nl %d Tpmi %d bwp_start %d start_sc %d start_symbol %d num_symbols %d cdmgrpsnodata %d num_dmrs %d dmrs_re_per_rb %d\n",
+          rnti,start_rb,nb_rb,mod_order,Nl,pusch_pdu->Tpmi,pusch_pdu->bwp_start,start_sc,start_symbol,number_of_symbols,cdm_grps_no_data,number_dmrs_symbols,nb_dmrs_re_per_rb);
+    
     // TbD num_of_mod_symbols is set but never used
     N_RE_prime = NR_NB_SC_PER_RB*number_of_symbols - nb_dmrs_re_per_rb*number_dmrs_symbols - N_PRB_oh;
     harq_process_ul_ue->num_of_mod_symbols = N_RE_prime*nb_rb*num_of_codewords;
@@ -236,8 +238,6 @@ void nr_ue_ulsch_procedures(PHY_VARS_NR_UE *UE,
   /////////////////////////ULSCH layer mapping/////////////////////////
   ///////////
 
-  tx_layers = (int16_t **)pusch_ue->txdataF_layers;
-
   nr_ue_layer_mapping(UE->ulsch[thread_id][gNB_id],
                       Nl,
                       available_bits/mod_order,
@@ -257,7 +257,7 @@ void nr_ue_ulsch_procedures(PHY_VARS_NR_UE *UE,
   int16_t *dmrs_seq = NULL;
 
   // if  transform precoding is enbaled (value 0)
-  if (pusch_pdu->transform_precoding == 0) {
+  if (pusch_pdu->transformPrecoder == transformPrecoder_enabled) {
 
     uint32_t nb_re_pusch=nb_rb * NR_NB_SC_PER_RB;
     uint32_t y_offset = 0;
@@ -315,10 +315,7 @@ void nr_ue_ulsch_procedures(PHY_VARS_NR_UE *UE,
       LOG_M("UE_DMRS_SEQ.m","UE_DMRS_SEQ", dmrs_seq,nb_re_pusch,1,1);
     #endif
 
-  }
-  else
-    memcpy(ulsch_ue->y, tx_layers[0], (available_bits/mod_order)*sizeof(int32_t));
-  
+  }  
 
   ///////////
   ////////////////////////////////////////////////////////////////////////
@@ -328,145 +325,233 @@ void nr_ue_ulsch_procedures(PHY_VARS_NR_UE *UE,
   /////////////////////////ULSCH RE mapping/////////////////////////
   ///////////
 
-  txdataF = UE->common_vars.txdataF;
-
-  for (ap=0; ap< Nl; ap++) {
-
-    uint8_t k_prime = 0;
-    uint16_t m = 0;
-
-    
-    #ifdef DEBUG_PUSCH_MAPPING
+    for (ap=0; ap< Nl; ap++) 
+    {      
+      uint8_t k_prime = 0;
+      uint16_t m = 0;
+      
+      #ifdef DEBUG_PUSCH_MAPPING
       printf("NR_ULSCH_UE: Value of CELL ID %d /t, u %d \n", frame_parms->Nid_cell, u);
-    #endif
-
-  
-
-    // DMRS params for this ap
-    get_Wt(Wt, ap, dmrs_type);
-    get_Wf(Wf, ap, dmrs_type);
-    delta = get_delta(ap, dmrs_type);
-
-    for (l=start_symbol; l<start_symbol+number_of_symbols; l++) {
-
-      uint16_t k = start_sc;
-      uint16_t n = 0;
-      uint8_t is_dmrs_sym = 0;
-      uint8_t is_ptrs_sym = 0;
-      uint16_t dmrs_idx = 0, ptrs_idx = 0;
-
-      if ((ul_dmrs_symb_pos >> l) & 0x01) {
-        is_dmrs_sym = 1;
-
-        // transform precoding disabled (value 1)
-        if (pusch_pdu->transform_precoding == 1){
-        
-          if (dmrs_type == pusch_dmrs_type1)
-            dmrs_idx = (pusch_pdu->bwp_start + start_rb)*6;
-          else
-            dmrs_idx = (pusch_pdu->bwp_start + start_rb)*4;
-
-          // TODO: performance improvement, we can skip the modulation of DMRS symbols outside the bandwidth part
-          // Perform this on gold sequence, not required when SC FDMA operation is done,
-	        LOG_D(PHY,"DMRS in symbol %d\n",l);
-          nr_modulation(pusch_dmrs[l][0], n_dmrs*2, DMRS_MOD_ORDER, mod_dmrs); // currently only codeword 0 is modulated. Qm = 2 as DMRS is QPSK modulated
-        
-        } else {
-          dmrs_idx = 0;
-        }
-       
-       
-      } else if (pusch_pdu->pdu_bit_map & PUSCH_PDU_BITMAP_PUSCH_PTRS) {
-
-        AssertFatal(pusch_pdu->transform_precoding == 1, "PTRS NOT SUPPORTED IF TRANSFORM PRECODING IS ENABLED\n");
+      #endif
 
-        if(is_ptrs_symbol(l, ulsch_ue->ptrs_symbols)) {
-          is_ptrs_sym = 1;
-          nr_modulation(pusch_dmrs[l][0], nb_rb, DMRS_MOD_ORDER, mod_ptrs);
+      // DMRS params for this ap
+      get_Wt(Wt, ap, dmrs_type);
+      get_Wf(Wf, ap, dmrs_type);
+      delta = get_delta(ap, dmrs_type);
+
+      for (l=start_symbol; l<start_symbol+number_of_symbols; l++) 
+      {
+        uint16_t k = start_sc;
+        uint16_t n = 0;
+        uint8_t is_dmrs_sym = 0;
+        uint8_t is_ptrs_sym = 0;
+        uint16_t dmrs_idx = 0, ptrs_idx = 0;
+
+        if ((ul_dmrs_symb_pos >> l) & 0x01) 
+        {
+          is_dmrs_sym = 1;
+          if (pusch_pdu->transformPrecoder == transformPrecoder_disabled)
+          {         
+            if (dmrs_type == pusch_dmrs_type1)
+              dmrs_idx = (pusch_pdu->bwp_start + start_rb)*6;
+            else
+              dmrs_idx = (pusch_pdu->bwp_start + start_rb)*4;
+
+            // TODO: performance improvement, we can skip the modulation of DMRS symbols outside the bandwidth part
+            // Perform this on gold sequence, not required when SC FDMA operation is done,
+	          LOG_D(PHY,"DMRS in symbol %d\n",l);
+              nr_modulation(pusch_dmrs[l][0], n_dmrs*2, DMRS_MOD_ORDER, mod_dmrs); // currently only codeword 0 is modulated. Qm = 2 as DMRS is QPSK modulated        
+          }
+          else 
+          {
+            dmrs_idx = 0;
+          }   
+        } 
+        else if (pusch_pdu->pdu_bit_map & PUSCH_PDU_BITMAP_PUSCH_PTRS) 
+        {       
+          AssertFatal(pusch_pdu->transformPrecoder == transformPrecoder_disabled, "PTRS NOT SUPPORTED IF TRANSFORM PRECODING IS ENABLED\n"); 
+
+          if(is_ptrs_symbol(l, ulsch_ue->ptrs_symbols)) 
+          {
+            is_ptrs_sym = 1;
+            nr_modulation(pusch_dmrs[l][0], nb_rb, DMRS_MOD_ORDER, mod_ptrs);
+          }
         }
-      }
 
-      for (i=0; i< nb_rb*NR_NB_SC_PER_RB; i++) {
+        for (i=0; i< nb_rb*NR_NB_SC_PER_RB; i++) 
+        {
+          uint8_t is_dmrs = 0;
+          uint8_t is_ptrs = 0;
 
-        uint8_t is_dmrs = 0;
-        uint8_t is_ptrs = 0;
+          sample_offsetF = l*frame_parms->ofdm_symbol_size + k;
 
-        sample_offsetF = l*frame_parms->ofdm_symbol_size + k;
-
-        if (is_dmrs_sym) {
-          if (k == ((start_sc+get_dmrs_freq_idx_ul(n, k_prime, delta, dmrs_type))%frame_parms->ofdm_symbol_size))
-            is_dmrs = 1;
-        } else if (is_ptrs_sym) {
+          if (is_dmrs_sym) 
+          {
+            if (k == ((start_sc+get_dmrs_freq_idx_ul(n, k_prime, delta, dmrs_type))%frame_parms->ofdm_symbol_size))
+              is_dmrs = 1;
+          } 
+          else if (is_ptrs_sym) 
+          {
             is_ptrs = is_ptrs_subcarrier(k,
-                                         rnti,
-                                         ap,
-                                         dmrs_type,
-                                         K_ptrs,
-                                         nb_rb,
-                                         pusch_pdu->pusch_ptrs.ptrs_ports_list[0].ptrs_re_offset,
-                                         start_sc,
-                                         frame_parms->ofdm_symbol_size);
-        }
-
-        if (is_dmrs == 1) {
-          // if transform precoding is enabled
-          if (pusch_pdu->transform_precoding == 0) {
-          
-            ((int16_t*)txdataF[ap])[(sample_offsetF)<<1] = (Wt[l_prime[0]]*Wf[k_prime]*AMP*dmrs_seq[2*dmrs_idx]) >> 15;
-            ((int16_t*)txdataF[ap])[((sample_offsetF)<<1) + 1] = (Wt[l_prime[0]]*Wf[k_prime]*AMP*dmrs_seq[(2*dmrs_idx) + 1]) >> 15;
+                                        rnti,
+                                        ap,
+                                        dmrs_type,
+                                        K_ptrs,
+                                        nb_rb,
+                                        pusch_pdu->pusch_ptrs.ptrs_ports_list[0].ptrs_re_offset,
+                                        start_sc,
+                                        frame_parms->ofdm_symbol_size);
+          }
+
+          if (is_dmrs == 1) 
+          {
+            if (pusch_pdu->transformPrecoder == transformPrecoder_enabled) 
+            {          
+              ((int16_t*)tx_precoding[ap])[(sample_offsetF)<<1] = (Wt[l_prime[0]]*Wf[k_prime]*AMP*dmrs_seq[2*dmrs_idx]) >> 15;
+              ((int16_t*)tx_precoding[ap])[((sample_offsetF)<<1) + 1] = (Wt[l_prime[0]]*Wf[k_prime]*AMP*dmrs_seq[(2*dmrs_idx) + 1]) >> 15;
+            } 
+            else 
+            {
+                ((int16_t*)tx_precoding[ap])[(sample_offsetF)<<1] = (Wt[l_prime[0]]*Wf[k_prime]*AMP*mod_dmrs[dmrs_idx<<1]) >> 15;
+                ((int16_t*)tx_precoding[ap])[((sample_offsetF)<<1) + 1] = (Wt[l_prime[0]]*Wf[k_prime]*AMP*mod_dmrs[(dmrs_idx<<1) + 1]) >> 15;
+            }
+            
+            #ifdef DEBUG_PUSCH_MAPPING
+            printf("DMRS: Layer: %d\t, dmrs_idx %d\t l %d \t k %d \t k_prime %d \t n %d \t dmrs: %d %d\n",
+              ap, dmrs_idx, l, k, k_prime, n, ((int16_t*)tx_precoding[ap])[(sample_offsetF)<<1],
+              ((int16_t*)tx_precoding[ap])[((sample_offsetF)<<1) + 1]);
+            #endif
+            
+            dmrs_idx++;
+            k_prime++;
+            k_prime&=1;
+            n+=(k_prime)?0:1;
+          }  
+          else if (is_ptrs == 1) 
+          {
+            ((int16_t*)tx_precoding[ap])[(sample_offsetF)<<1] = (beta_ptrs*AMP*mod_ptrs[ptrs_idx<<1]) >> 15;
+            ((int16_t*)tx_precoding[ap])[((sample_offsetF)<<1) + 1] = (beta_ptrs*AMP*mod_ptrs[(ptrs_idx<<1) + 1]) >> 15;
+
+            ptrs_idx++;
+          } 
+          else if (!is_dmrs_sym || allowed_xlsch_re_in_dmrs_symbol(k, start_sc, frame_parms->ofdm_symbol_size, cdm_grps_no_data, dmrs_type)) 
+          {
+            if (pusch_pdu->transformPrecoder == transformPrecoder_disabled)
+            { 
+              ((int16_t*)tx_precoding[ap])[(sample_offsetF)<<1]       = ((int16_t *)tx_layers[ap])[m<<1];
+              ((int16_t*)tx_precoding[ap])[((sample_offsetF)<<1) + 1] = ((int16_t *)tx_layers[ap])[(m<<1) + 1];
+            }
+            else
+            {
+            ((int16_t*)tx_precoding[ap])[(sample_offsetF)<<1]       = ((int16_t *) ulsch_ue->y)[m<<1];
+            ((int16_t*)tx_precoding[ap])[((sample_offsetF)<<1) + 1] = ((int16_t *) ulsch_ue->y)[(m<<1) + 1];
+            }
+            
+            #ifdef DEBUG_PUSCH_MAPPING
+            printf("DATA: layer %d\t m %d\t l %d \t k %d \t txdataF: %d %d\n",
+            ap, m, l, k, ((int16_t*)tx_precoding[ap])[(sample_offsetF)<<1],
+            ((int16_t*)tx_precoding[ap])[((sample_offsetF)<<1) + 1]);          
+            #endif
+
+            m++;
+          } 
+          else 
+          {
+            ((int16_t*)tx_precoding[ap])[(sample_offsetF)<<1]       = 0;
+            ((int16_t*)tx_precoding[ap])[((sample_offsetF)<<1) + 1] = 0;
+          }
+
+          if (++k >= frame_parms->ofdm_symbol_size)
+          {
+            k -= frame_parms->ofdm_symbol_size;
+          }
+        } //for (i=0; i< nb_rb*NR_NB_SC_PER_RB; i++) 
+      }//for (l=start_symbol; l<start_symbol+number_of_symbols; l++)
+    }//for (ap=0; ap< Nl; ap++)
+
+    /////////////////////////ULSCH precoding/////////////////////////
+    ///////////
+    ///Layer Precoding and Antenna port mapping
+    // tx_layers 0-3 are mapped on antenna ports
+    // The precoding info is supported by nfapi such as num_prgs, prg_size, prgs_list and pm_idx
+    // The same precoding matrix is applied on prg_size RBs, Thus
+    //        pmi = prgs_list[rbidx/prg_size].pm_idx, rbidx =0,...,rbSize-1
+    // The Precoding matrix:
+    for (int ap=0; ap<frame_parms->nb_antennas_tx; ap++) 
+    {
+      for (int l=start_symbol; l<start_symbol+number_of_symbols; l++) 
+      {
+        uint16_t k = start_sc;
+
+        for (int rb=0; rb<nb_rb; rb++) 
+        {
+          //get pmi info
+          uint8_t pmi=pusch_pdu->Tpmi;
           
-          } else {
-
-              ((int16_t*)txdataF[ap])[(sample_offsetF)<<1] = (Wt[l_prime[0]]*Wf[k_prime]*AMP*mod_dmrs[dmrs_idx<<1]) >> 15;
-              ((int16_t*)txdataF[ap])[((sample_offsetF)<<1) + 1] = (Wt[l_prime[0]]*Wf[k_prime]*AMP*mod_dmrs[(dmrs_idx<<1) + 1]) >> 15;
-
+          if (pmi == 0) {//unitary Precoding
+            if(ap< pusch_pdu->nrOfLayers)
+              memcpy((void*)&txdataF[ap][l*frame_parms->ofdm_symbol_size  + k],
+                     (void*)&tx_precoding[ap][2*(l*frame_parms->ofdm_symbol_size + k)],
+                     NR_NB_SC_PER_RB*sizeof(int32_t));
+            else
+              memset((void*)&txdataF[ap][l*frame_parms->ofdm_symbol_size + k],
+                     0,
+                     NR_NB_SC_PER_RB*sizeof(int32_t));
+         
+            k += NR_NB_SC_PER_RB;
+            if (k >= frame_parms->ofdm_symbol_size) {
+              k -= frame_parms->ofdm_symbol_size;
+            }
+          }
+          else
+          {
+            //get the precoding matrix weights:
+            char *W_prec;
+            switch (frame_parms->nb_antennas_tx) 
+            {
+              case 1://1 antenna port
+                W_prec = nr_W_1l_2p[pmi][ap];
+                break;
+              case 2://2 antenna ports
+                if (pusch_pdu->nrOfLayers == 1)//1 layer
+                  W_prec = nr_W_1l_2p[pmi][ap];
+                else//2 layers
+                  W_prec = nr_W_2l_2p[pmi][ap];
+                break;
+              case 4://4 antenna ports
+                if (pusch_pdu->nrOfLayers == 1)//1 layer
+                  W_prec = nr_W_1l_4p[pmi][ap];
+                else if (pusch_pdu->nrOfLayers == 2)//2 layers
+                  W_prec = nr_W_2l_4p[pmi][ap];
+                else if (pusch_pdu->nrOfLayers == 3)//3 layers
+                  W_prec = nr_W_3l_4p[pmi][ap];
+                else//4 layers
+                  W_prec = nr_W_4l_4p[pmi][ap];
+                break;
+              default:
+                LOG_D(PHY,"Precoding 1,2, or 4 antenna ports are currently supported\n");
+                W_prec = nr_W_1l_2p[pmi][ap];
+                break;
             }
 
-          #ifdef DEBUG_PUSCH_MAPPING
-            printf("dmrs_idx %d\t l %d \t k %d \t k_prime %d \t n %d \t dmrs: %d %d\n",
-            dmrs_idx, l, k, k_prime, n, ((int16_t*)txdataF[ap])[(sample_offsetF)<<1],
-            ((int16_t*)txdataF[ap])[((sample_offsetF)<<1) + 1]);
-          #endif
-
-
-          dmrs_idx++;
-          k_prime++;
-          k_prime&=1;
-          n+=(k_prime)?0:1;
-
-        }  else if (is_ptrs == 1) {
-
-          ((int16_t*)txdataF[ap])[(sample_offsetF)<<1] = (beta_ptrs*AMP*mod_ptrs[ptrs_idx<<1]) >> 15;
-          ((int16_t*)txdataF[ap])[((sample_offsetF)<<1) + 1] = (beta_ptrs*AMP*mod_ptrs[(ptrs_idx<<1) + 1]) >> 15;
-
-          ptrs_idx++;
-
-        } else if (!is_dmrs_sym || allowed_xlsch_re_in_dmrs_symbol(k, start_sc, frame_parms->ofdm_symbol_size, cdm_grps_no_data, dmrs_type)) {
-
-          ((int16_t*)txdataF[ap])[(sample_offsetF)<<1]       = ((int16_t *) ulsch_ue->y)[m<<1];
-          ((int16_t*)txdataF[ap])[((sample_offsetF)<<1) + 1] = ((int16_t *) ulsch_ue->y)[(m<<1) + 1];
-
-        #ifdef DEBUG_PUSCH_MAPPING
-          printf("m %d\t l %d \t k %d \t txdataF: %d %d\n",
-          m, l, k, ((int16_t*)txdataF[ap])[(sample_offsetF)<<1],
-          ((int16_t*)txdataF[ap])[((sample_offsetF)<<1) + 1]);
-        #endif
-
-          m++;
-
-        } else {
-
-          ((int16_t*)txdataF[ap])[(sample_offsetF)<<1]       = 0;
-          ((int16_t*)txdataF[ap])[((sample_offsetF)<<1) + 1] = 0;
-
-        }
+            for (int i=0; i<NR_NB_SC_PER_RB; i++) 
+            {
+              int32_t re_offset = l*frame_parms->ofdm_symbol_size + k;
+              int32_t precodatatx_F = nr_layer_precoder(tx_precoding, W_prec, pusch_pdu->nrOfLayers, re_offset);
+              ((int16_t*)txdataF[ap])[(re_offset<<1)] = ((int16_t *) &precodatatx_F)[0];
+              ((int16_t*)txdataF[ap])[(re_offset<<1) + 1] = ((int16_t *) &precodatatx_F)[1];
+                            
+              if (++k >= frame_parms->ofdm_symbol_size) 
+              {
+                k -= frame_parms->ofdm_symbol_size;
+              }
+            }
+          }
+        } //RB loop
+      } // symbol loop
+    }// port loop
 
-        if (++k >= frame_parms->ofdm_symbol_size)
-          k -= frame_parms->ofdm_symbol_size;
-      }
-    }
-  }
-  }
+  } //for (cwd_index = 0;cwd_index < num_of_codewords; cwd_index++)
 
   NR_UL_UE_HARQ_t *harq_process_ulsch=NULL;
   harq_process_ulsch = UE->ulsch[thread_id][gNB_id][0]->harq_processes[harq_pid];
diff --git a/openair1/PHY/defs_gNB.h b/openair1/PHY/defs_gNB.h
index 11b5112a6cb..600ba07cb35 100644
--- a/openair1/PHY/defs_gNB.h
+++ b/openair1/PHY/defs_gNB.h
@@ -479,7 +479,7 @@ typedef struct {
   /// \brief Cross-correlation of two UE signals.
   /// - first index: rx antenna [0..nb_antennas_rx[
   /// - second index: symbol [0..]
-  int32_t **rho;
+  int32_t ***rho;
   /// \f$\log_2(\max|H_i|^2)\f$
   int16_t log2_maxh;
   /// \brief Magnitude of Uplink Channel first layer (16QAM level/First 64QAM level).
@@ -509,6 +509,10 @@ typedef struct {
   /// \brief llr values.
   /// - first index: ? [0..1179743] (hard coded)
   int16_t *llr;
+  /// \brief llr values per layer.
+  /// - first index: ? [0..3] (hard coded)
+  /// - first index: ? [0..1179743] (hard coded)
+  int16_t **llr_layers;
   /// DMRS symbol index, to be updated every DMRS symbol within a slot.
   uint8_t dmrs_symbol;
   // PTRS symbol index, to be updated every PTRS symbol within a slot.
diff --git a/openair1/PHY/defs_nr_UE.h b/openair1/PHY/defs_nr_UE.h
index 69978f55482..02988b590a0 100644
--- a/openair1/PHY/defs_nr_UE.h
+++ b/openair1/PHY/defs_nr_UE.h
@@ -229,6 +229,9 @@ typedef struct {
 typedef struct {
   /// TX buffers for multiple layers
   int32_t *txdataF_layers[NR_MAX_NB_LAYERS];
+
+  /// TX buffers for UE-spec transmission
+  int32_t *txdataF_precoding[NR_MAX_NB_LAYERS];
   } NR_UE_PUSCH;
 
 typedef struct {
diff --git a/openair1/PHY/defs_nr_common.h b/openair1/PHY/defs_nr_common.h
index c385d323e3b..fbe094f41c4 100644
--- a/openair1/PHY/defs_nr_common.h
+++ b/openair1/PHY/defs_nr_common.h
@@ -93,7 +93,7 @@
 #define NR_MAX_CSET_DURATION 3
 
 #define NR_MAX_NB_RBG 18
-#define NR_MAX_NB_LAYERS 2 // 8 // SU-MIMO (3GPP TS 38.211 V15.4.0 section 7.3.1.3)
+#define NR_MAX_NB_LAYERS 4 // 8 // SU-MIMO (3GPP TS 38.211 V15.4.0 section 7.3.1.3)
 #define NR_MAX_NB_CODEWORDS 2
 #define NR_MAX_NB_HARQ_PROCESSES 16
 #define NR_MAX_PDSCH_ENCODED_LENGTH (NR_MAX_NB_RB*NR_SYMBOLS_PER_SLOT*NR_NB_SC_PER_RB*8*NR_MAX_NB_LAYERS) // 8 is the maximum modulation order (it was 950984 before !!)
@@ -104,11 +104,11 @@
 #define MAX_NUM_NR_DLSCH_SEGMENTS (NR_MAX_NB_LAYERS*34)
 #define MAX_NR_DLSCH_PAYLOAD_BYTES (MAX_NUM_NR_DLSCH_SEGMENTS*1056)
 
-#define MAX_NUM_NR_ULSCH_SEGMENTS 34
+#define MAX_NUM_NR_ULSCH_SEGMENTS (NR_MAX_NB_LAYERS*34)
 #define MAX_NR_ULSCH_PAYLOAD_BYTES (MAX_NUM_NR_ULSCH_SEGMENTS*1056)
 
-#define MAX_NUM_NR_CHANNEL_BITS (14*273*12*8)  // 14 symbols, 273 RB
-#define MAX_NUM_NR_RE (14*273*12)
+#define MAX_NUM_NR_CHANNEL_BITS (4*14*273*12*8)  // 14 symbols, 273 RB
+#define MAX_NUM_NR_RE (4*14*273*12)
 #define NR_RX_NB_TH 1
 #define NR_NB_TH_SLOT 2
 
diff --git a/openair1/SCHED_NR/nr_ru_procedures.c b/openair1/SCHED_NR/nr_ru_procedures.c
index d8fa11d1571..640a0740ef4 100644
--- a/openair1/SCHED_NR/nr_ru_procedures.c
+++ b/openair1/SCHED_NR/nr_ru_procedures.c
@@ -526,7 +526,8 @@ void nr_fep0(RU_t *ru, int first_half) {
     end_symbol = NR_SYMBOLS_PER_SLOT;
   }
 
-  LOG_D(PHY,"In fep0 for slot = %d, first_half = %d, start_symbol = %d, end_symbol = %d\n", proc->tti_rx, first_half, start_symbol, end_symbol);
+  LOG_D(PHY,"In fep0 for slot = %d, first_half = %d, start_symbol = %d, end_symbol = %d, nb_antennas_rx = %d,N_TA_offset = %d\n", 
+        proc->tti_rx, first_half, start_symbol, end_symbol,fp->nb_antennas_rx,ru->N_TA_offset);
   //  printf("fep0: slot %d\n",slot);
 
   VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_PROCEDURES_RU_FEPRX+proc->tti_rx, 1);
diff --git a/openair1/SCHED_NR/phy_procedures_nr_gNB.c b/openair1/SCHED_NR/phy_procedures_nr_gNB.c
index 1685e206ae4..63c708e5654 100644
--- a/openair1/SCHED_NR/phy_procedures_nr_gNB.c
+++ b/openair1/SCHED_NR/phy_procedures_nr_gNB.c
@@ -253,13 +253,13 @@ void nr_postDecode(PHY_VARS_gNB *gNB, notifiedFIFO_elt_t *req) {
   // if all segments are done
   if (rdata->nbSegments == ulsch_harq->processedSegments) {
     if (decodeSuccess) {
-      LOG_D(PHY,"[gNB %d] ULSCH: Setting ACK for SFN/SF %d.%d (pid %d, ndi %d, status %d, round %d, TBS %d, Max interation (all seg) %d)\n",
+      LOG_I(PHY,"[gNB %d] ULSCH: Setting ACK for SFN/SF %d.%d (pid %d, ndi %d, status %d, round %d, TBS %d, Max interation (all seg) %d)\n",
             gNB->Mod_id,ulsch_harq->frame,ulsch_harq->slot,rdata->harq_pid,pusch_pdu->pusch_data.new_data_indicator,ulsch_harq->status,ulsch_harq->round,ulsch_harq->TBS,rdata->decodeIterations);
       ulsch_harq->status = SCH_IDLE;
       ulsch_harq->round  = 0;
       ulsch->harq_mask &= ~(1 << rdata->harq_pid);
 
-      LOG_D(PHY, "ULSCH received ok \n");
+      LOG_I(PHY, "ULSCH received ok \n");
       nr_fill_indication(gNB,ulsch_harq->frame, ulsch_harq->slot, rdata->ulsch_id, rdata->harq_pid, 0,0);
       //dumpsig=1;
     } else {
@@ -281,7 +281,7 @@ void nr_postDecode(PHY_VARS_gNB *gNB, notifiedFIFO_elt_t *req) {
       }
       ulsch_harq->handled  = 1;
 
-      LOG_D(PHY, "ULSCH %d in error\n",rdata->ulsch_id);
+      LOG_I(PHY, "ULSCH %d in error\n",rdata->ulsch_id);
       nr_fill_indication(gNB,ulsch_harq->frame, ulsch_harq->slot, rdata->ulsch_id, rdata->harq_pid, 1,0);
     }
 /*
@@ -365,6 +365,14 @@ void nr_ulsch_procedures(PHY_VARS_gNB *gNB, int frame_rx, int slot_rx, int ULSCH
 	number_dmrs_symbols, // number of dmrs symbols irrespective of single or double symbol dmrs
 	pusch_pdu->qam_mod_order,
 	pusch_pdu->nrOfLayers);
+
+
+  nr_ulsch_layer_demapping(gNB->pusch_vars[ULSCH_id]->llr,
+				     pusch_pdu->nrOfLayers,
+				     pusch_pdu->qam_mod_order,
+				     G,
+				     gNB->pusch_vars[ULSCH_id]->llr_layers);
+             
   //----------------------------------------------------------
   //------------------- ULSCH unscrambling -------------------
   //----------------------------------------------------------
@@ -731,7 +739,7 @@ int phy_procedures_gNB_uespec_RX(PHY_VARS_gNB *gNB, int frame_rx, int slot_rx) {
             (ulsch_harq->slot == slot_rx) &&
             (ulsch_harq->handled == 0)){
 
-          LOG_D(PHY, "PUSCH detection started in frame %d slot %d\n",
+          LOG_I(PHY, "PUSCH detection started in frame %d slot %d\n",
                 frame_rx,slot_rx);
           int num_dmrs=0;
           for (int s=0;s<NR_NUMBER_OF_SYMBOLS_PER_SLOT; s++)
@@ -767,7 +775,7 @@ int phy_procedures_gNB_uespec_RX(PHY_VARS_gNB *gNB, int frame_rx, int slot_rx) {
 	        start_meas(&gNB->rx_pusch_stats);
           no_sig = nr_rx_pusch(gNB, ULSCH_id, frame_rx, slot_rx, harq_pid);
           if (no_sig) {
-            LOG_D(PHY, "PUSCH not detected in frame %d, slot %d\n", frame_rx, slot_rx);
+            LOG_I(PHY, "PUSCH not detected in frame %d, slot %d\n", frame_rx, slot_rx);
             nr_fill_indication(gNB, frame_rx, slot_rx, ULSCH_id, harq_pid, 1,1);
             return 1;
           }
@@ -783,7 +791,7 @@ int phy_procedures_gNB_uespec_RX(PHY_VARS_gNB *gNB, int frame_rx, int slot_rx) {
               dB_fixed_x10(gNB->pusch_vars[ULSCH_id]->ulsch_noise_power_tot) + gNB->pusch_thres) {
              NR_gNB_SCH_STATS_t *stats=get_ulsch_stats(gNB,ulsch);
 
-             LOG_D(PHY, "PUSCH not detected in %d.%d (%d,%d,%d)\n",frame_rx,slot_rx,
+             LOG_I(PHY, "PUSCH not detected in %d.%d (%d,%d,%d)\n",frame_rx,slot_rx,
                    dB_fixed_x10(gNB->pusch_vars[ULSCH_id]->ulsch_power_tot),
                    dB_fixed_x10(gNB->pusch_vars[ULSCH_id]->ulsch_noise_power_tot),gNB->pusch_thres);
              gNB->pusch_vars[ULSCH_id]->ulsch_power_tot = gNB->pusch_vars[ULSCH_id]->ulsch_noise_power_tot;
diff --git a/openair1/SCHED_NR_UE/fapi_nr_ue_l1.c b/openair1/SCHED_NR_UE/fapi_nr_ue_l1.c
index f6e4d90beed..8e1df435c34 100644
--- a/openair1/SCHED_NR_UE/fapi_nr_ue_l1.c
+++ b/openair1/SCHED_NR_UE/fapi_nr_ue_l1.c
@@ -157,7 +157,7 @@ int8_t nr_ue_scheduled_response(nr_scheduled_response_t *scheduled_response){
       for (i = 0; i < ul_config->number_pdus; ++i){
 
         AssertFatal(ul_config->ul_config_list[i].pdu_type <= FAPI_NR_UL_CONFIG_TYPES,"pdu_type %d out of bounds\n",ul_config->ul_config_list[i].pdu_type);
-        LOG_D(PHY, "In %s: processing %s PDU of %d total UL PDUs (ul_config %p) \n", __FUNCTION__, ul_pdu_type[ul_config->ul_config_list[i].pdu_type - 1], ul_config->number_pdus, ul_config);
+        LOG_I(PHY, "In %s i %d: processing %s PDU of %d total UL PDUs (ul_config %p) \n", __FUNCTION__, i, ul_pdu_type[ul_config->ul_config_list[i].pdu_type - 1], ul_config->number_pdus, ul_config);
 
         uint8_t pdu_type = ul_config->ul_config_list[i].pdu_type, current_harq_pid, gNB_id = 0;
         /* PRACH */
@@ -180,6 +180,8 @@ int8_t nr_ue_scheduled_response(nr_scheduled_response_t *scheduled_response){
           if (harq_process_ul_ue){
 
             nfapi_nr_ue_pusch_pdu_t *pusch_pdu = &harq_process_ul_ue->pusch_pdu;
+            
+            LOG_I(PHY, "In %s i %d: copy pusch_config_pdu nrOfLayers:%d, num_dmrs_cdm_grps_no_data:%d \n", __FUNCTION__, i, pusch_config_pdu->nrOfLayers,pusch_config_pdu->num_dmrs_cdm_grps_no_data);
 
             memcpy(pusch_pdu, pusch_config_pdu, sizeof(nfapi_nr_ue_pusch_pdu_t));
 
diff --git a/openair1/SCHED_NR_UE/phy_procedures_nr_ue.c b/openair1/SCHED_NR_UE/phy_procedures_nr_ue.c
index 940931694c9..0a0a9cc2f9b 100644
--- a/openair1/SCHED_NR_UE/phy_procedures_nr_ue.c
+++ b/openair1/SCHED_NR_UE/phy_procedures_nr_ue.c
@@ -270,9 +270,12 @@ void phy_procedures_nrUE_TX(PHY_VARS_NR_UE *ue,
   AssertFatal(ue->CC_id == 0, "Transmission on secondary CCs is not supported yet\n");
 
   VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_PHY_PROCEDURES_UE_TX,VCD_FUNCTION_IN);
-
-  memset(ue->common_vars.txdataF[0], 0, sizeof(int)*14*ue->frame_parms.ofdm_symbol_size);
-
+  
+  for(int i=0; i< ue->frame_parms.nb_antennas_tx; ++i)
+  {
+    memset(ue->common_vars.txdataF[i], 0, sizeof(int)*14*ue->frame_parms.ofdm_symbol_size);
+  }
+  
   LOG_D(PHY,"****** start TX-Chain for AbsSubframe %d.%d ******\n", frame_tx, slot_tx);
 
 
diff --git a/openair1/SIMULATION/NR_PHY/dlsim.c b/openair1/SIMULATION/NR_PHY/dlsim.c
index 1eb60279364..d5a10546541 100644
--- a/openair1/SIMULATION/NR_PHY/dlsim.c
+++ b/openair1/SIMULATION/NR_PHY/dlsim.c
@@ -768,7 +768,7 @@ int main(int argc, char **argv)
 
   prepare_scd(scd);
 
-  fill_default_secondaryCellGroup(scc, scd, secondaryCellGroup, 0, 1, n_tx, 0, 0, 0);
+  fill_default_secondaryCellGroup(scc, scd, secondaryCellGroup, 0, 1, n_tx, n_rx, 0, 0, 0);
 
   /* RRC parameter validation for secondaryCellGroup */
   fix_scd(scd);
diff --git a/openair1/SIMULATION/NR_PHY/ulschsim.c b/openair1/SIMULATION/NR_PHY/ulschsim.c
index 94aea36a670..fb8b4295ae9 100644
--- a/openair1/SIMULATION/NR_PHY/ulschsim.c
+++ b/openair1/SIMULATION/NR_PHY/ulschsim.c
@@ -137,6 +137,7 @@ int main(int argc, char **argv)
   uint16_t nb_symb_sch = 12;
   uint16_t nb_rb = 50;
   uint8_t Imcs = 9;
+  uint8_t Nl = 1;
 
   double DS_TDL = .03;
 
@@ -150,7 +151,7 @@ int main(int argc, char **argv)
   randominit(0);
 
   //while ((c = getopt(argc, argv, "df:hpg:i:j:n:l:m:r:s:S:y:z:M:N:F:R:P:")) != -1) {
-  while ((c = getopt(argc, argv, "hg:n:s:S:py:z:M:N:R:F:m:l:r:")) != -1) {
+  while ((c = getopt(argc, argv, "hg:n:s:S:py:z:M:N:R:F:m:l:r:W:")) != -1) {
     switch (c) {
       /*case 'f':
          write_output_file = 1;
@@ -251,7 +252,7 @@ int main(int argc, char **argv)
       case 'y':
         n_tx = atoi(optarg);
 
-        if ((n_tx == 0) || (n_tx > 2)) {
+        if ((n_tx == 0) || (n_tx > 4)) {
           printf("Unsupported number of TX antennas %d. Exiting.\n", n_tx);
           exit(-1);
         }
@@ -261,7 +262,7 @@ int main(int argc, char **argv)
       case 'z':
         n_rx = atoi(optarg);
 
-        if ((n_rx == 0) || (n_rx > 2)) {
+        if ((n_rx == 0) || (n_rx > 4)) {
           printf("Unsupported number of RX antennas %d. Exiting.\n", n_rx);
           exit(-1);
         }
@@ -299,6 +300,10 @@ int main(int argc, char **argv)
           printf("Illegal PBCH phase (0-3) got %d\n", pbch_phase);
         break;*/
 
+      case 'W':
+        Nl = atoi(optarg);
+      break;
+
       case 'm':
         Imcs = atoi(optarg);
 #ifdef DEBUG_NR_ULSCHSIM
@@ -335,13 +340,14 @@ int main(int argc, char **argv)
           printf("-z Number of RX antennas used in UE\n");
           //printf("-i Relative strength of first intefering eNB (in dB) - cell_id mod 3 = 1\n");
           //printf("-j Relative strength of second intefering eNB (in dB) - cell_id mod 3 = 2\n");
+          printf("-W number of layer\n");
           printf("-M Multiple SSB positions in burst\n");
           printf("-N Nid_cell\n");
           printf("-R N_RB_UL\n");
           printf("-F Input filename (.txt format) for RX conformance testing\n");
-          printf("-m\n");
-          printf("-l\n");
-          printf("-r\n");
+          printf("-m MCS\n");
+          printf("-l number of symbol\n");
+          printf("-r number of RB\n");
           //printf("-O oversampling factor (1,2,4,8,16)\n");
           //printf("-A Interpolation_filname Run with Abstraction to generate Scatter plot using interpolation polynomial in file\n");
           //printf("-C Generate Calibration information for Abstraction (effective SNR adjustment to remove Pe bias w.r.t. AWGN)\n");
@@ -359,8 +365,8 @@ int main(int argc, char **argv)
     snr1 = snr0 + 10;
 
   gNB2UE = new_channel_desc_scm(n_tx,
-		                n_rx,
-				channel_model,
+                                n_rx,
+                                channel_model,
                                 61.44e6, //N_RB2sampling_rate(N_RB_DL),
                                 40e6, //N_RB2channel_bandwidth(N_RB_DL),
                                 DS_TDL,
@@ -426,8 +432,7 @@ int main(int argc, char **argv)
   uint8_t length_dmrs = 1;
   uint8_t N_PRB_oh;
   uint16_t N_RE_prime,code_rate;
-  unsigned char mod_order;
-  uint8_t Nl = 1;
+  unsigned char mod_order;  
   uint8_t rvidx = 0;
   uint8_t UE_id = 0;
 
@@ -437,9 +442,14 @@ int main(int argc, char **argv)
 
   NR_UE_ULSCH_t *ulsch_ue = UE->ulsch[0][0][0];
 
+  if ((Nl==4)||(Nl==3))
+  {
+    nb_re_dmrs = nb_re_dmrs*2;
+  }
+
   mod_order = nr_get_Qm_ul(Imcs, 0);
   code_rate = nr_get_code_rate_ul(Imcs, 0);
-  available_bits = nr_get_G(nb_rb, nb_symb_sch, nb_re_dmrs, length_dmrs, mod_order, 1);
+  available_bits = nr_get_G(nb_rb, nb_symb_sch, nb_re_dmrs, length_dmrs, mod_order, Nl);
   TBS = nr_compute_tbs(mod_order,code_rate, nb_rb, nb_symb_sch, nb_re_dmrs*length_dmrs, 0, 0, Nl);
 
   printf("\nAvailable bits %u TBS %u mod_order %d\n", available_bits, TBS, mod_order);
diff --git a/openair1/SIMULATION/NR_PHY/ulsim.c b/openair1/SIMULATION/NR_PHY/ulsim.c
index 69316d6b884..b6cfae3fe17 100644
--- a/openair1/SIMULATION/NR_PHY/ulsim.c
+++ b/openair1/SIMULATION/NR_PHY/ulsim.c
@@ -282,7 +282,7 @@ int main(int argc, char **argv)
   int gNB_id = 0;
   int ap;
   int tx_offset;
-  int32_t txlev=0;
+  int32_t txlev_sum = 0, atxlev[4];
   int start_rb = 0;
   int UE_id =0; // [hna] only works for UE_id = 0 because NUMBER_OF_NR_UE_MAX is set to 1 (phy_init_nr_gNB causes segmentation fault)
   float target_error_rate = 0.01;
@@ -307,7 +307,7 @@ int main(int argc, char **argv)
   uint16_t ptrsSymbPerSlot = 0;
   uint16_t ptrsRePerSymb = 0;
 
-  uint8_t transform_precoding = 1; // 0 - ENABLE, 1 - DISABLE
+  uint8_t transformPrecoder = transformPrecoder_disabled; // 0 - ENABLE, 1 - DISABLE
   uint8_t num_dmrs_cdm_grps_no_data = 1;
   uint8_t mcs_table = 0;
 
@@ -329,7 +329,7 @@ int main(int argc, char **argv)
   /* initialize the sin-cos table */
    InitSinLUT();
 
-  while ((c = getopt(argc, argv, "a:b:c:d:ef:g:h:ikl:m:n:p:r:s:u:w:y:z:F:G:H:M:N:PR:S:T:U:L:Z")) != -1) {
+  while ((c = getopt(argc, argv, "a:b:c:d:ef:g:h:i:kl:m:n:p:r:s:u:w:y:z:F:G:H:M:N:PR:S:T:U:L:Z:W:")) != -1) {
     printf("handling optarg %c\n",c);
     switch (c) {
 
@@ -363,8 +363,8 @@ int main(int argc, char **argv)
       scg_fd = fopen(optarg, "r");
       
       if (scg_fd == NULL) {
-	printf("Error opening %s\n", optarg);
-	exit(-1);
+        printf("Error opening %s\n", optarg);
+        exit(-1);
       }
 
       break;
@@ -437,6 +437,10 @@ int main(int argc, char **argv)
     case 'm':
       Imcs = atoi(optarg);
       break;
+
+    case 'W':
+      precod_nbr_layers = atoi(optarg);
+      break;
       
     case 'n':
       n_trials = atoi(optarg);
@@ -485,9 +489,9 @@ int main(int argc, char **argv)
     case 'y':
       n_tx = atoi(optarg);
       
-      if ((n_tx == 0) || (n_tx > 2)) {
-	printf("Unsupported number of tx antennas %d\n", n_tx);
-	exit(-1);
+      if ((n_tx == 0) || (n_tx > 4)) {
+          printf("Unsupported number of tx antennas %d\n", n_tx);
+          exit(-1);
       }
       
       break;
@@ -496,8 +500,8 @@ int main(int argc, char **argv)
       n_rx = atoi(optarg);
       
       if ((n_rx == 0) || (n_rx > 8)) {
-	printf("Unsupported number of rx antennas %d\n", n_rx);
-	exit(-1);
+          printf("Unsupported number of rx antennas %d\n", n_rx);
+          exit(-1);
       }
       
       break;
@@ -506,8 +510,8 @@ int main(int argc, char **argv)
       input_fd = fopen(optarg, "r");
       
       if (input_fd == NULL) {
-	printf("Problem with filename %s\n", optarg);
-	exit(-1);
+          printf("Problem with filename %s\n", optarg);
+          exit(-1);
       }
       
       break;
@@ -568,11 +572,11 @@ int main(int argc, char **argv)
 
     case 'Z':
 
-      transform_precoding = 0; // enabled
+      transformPrecoder = transformPrecoder_enabled; 
       num_dmrs_cdm_grps_no_data = 2;
       mcs_table = 3;
       
-      printf("NOTE: TRANSFORM PRECODING (SC-FDMA) is ENABLED in UPLINK (0 - ENABLE, 1 - DISABLE) : %d \n",  transform_precoding);
+      printf("NOTE: TRANSFORM PRECODING (SC-FDMA) is ENABLED in UPLINK (0 - ENABLE, 1 - DISABLE) : %d \n",  transformPrecoder);
 
       break;
 
@@ -611,6 +615,7 @@ int main(int argc, char **argv)
       printf("-U Change DMRS Config, arguments list DMRS TYPE{0=A,1=B} DMRS AddPos{0:3}, e.g. -U 2 0 2 \n");
       printf("-Q If -F used, read parameters from file\n");
       printf("-Z If -Z is used, SC-FDMA or transform precoding is enabled in Uplink \n");
+	  printf("-W Num of layer for PUSCH\n");
       exit(-1);
       break;
 
@@ -713,7 +718,7 @@ int main(int argc, char **argv)
 
   prepare_scd(scd);
 
-  fill_default_secondaryCellGroup(scc, scd, secondaryCellGroup, 0, 1, n_tx, 0, 0, 0);
+  fill_default_secondaryCellGroup(scc, scd, secondaryCellGroup, 0, 1, n_tx, n_rx, 0, 0, 0);
 
   // xer_fprint(stdout, &asn_DEF_NR_CellGroupConfig, (const void*)secondaryCellGroup);
 
@@ -724,9 +729,9 @@ int main(int argc, char **argv)
 
   gNB->if_inst->NR_PHY_config_req      = nr_phy_config_request;
   // common configuration
-  rrc_mac_config_req_gNB(0,0, n_tx, n_rx, 0, scc, &rrc.carrier.mib,0, 0, NULL);
+  rrc_mac_config_req_gNB(0,0, n_tx, n_rx, n_tx, scc, &rrc.carrier.mib,0, 0, NULL);
   // UE dedicated configuration
-  rrc_mac_config_req_gNB(0,0, n_tx, n_rx, 0, scc, &rrc.carrier.mib,1, secondaryCellGroup->spCellConfig->reconfigurationWithSync->newUE_Identity,secondaryCellGroup);
+  rrc_mac_config_req_gNB(0,0, n_tx, n_rx, n_tx, scc, &rrc.carrier.mib,1, secondaryCellGroup->spCellConfig->reconfigurationWithSync->newUE_Identity,secondaryCellGroup);
   frame_parms->nb_antennas_tx = n_tx;
   frame_parms->nb_antennas_rx = n_rx;
   nfapi_nr_config_request_scf_t *cfg = &gNB->gNB_config;
@@ -870,14 +875,18 @@ int main(int argc, char **argv)
   uint16_t number_dmrs_symbols = get_dmrs_symbols_in_slot(l_prime_mask, nb_symb_sch);
   printf("num dmrs sym %d\n",number_dmrs_symbols);
   uint8_t  nb_re_dmrs          = (dmrs_config_type == pusch_dmrs_type1) ? 6 : 4;
-
-  // if transform precoding is enabled
-  if (transform_precoding == 0) {
+  
+  if ((frame_parms->nb_antennas_tx==4)&&(precod_nbr_layers==4))
+  {
+    num_dmrs_cdm_grps_no_data = 2;
+  }
+  
+  if (transformPrecoder == transformPrecoder_enabled) {  
 
     AssertFatal(enable_ptrs == 0, "PTRS NOT SUPPORTED IF TRANSFORM PRECODING IS ENABLED\n");
 
     int8_t index = get_index_for_dmrs_lowpapr_seq((NR_NB_SC_PER_RB/2) * nb_rb);
-	  AssertFatal(index >= 0, "Num RBs not configured according to 3GPP 38.211 section 6.3.1.4. For PUSCH with transform precoding, num RBs cannot be multiple of any other primenumber other than 2,3,5\n");
+	AssertFatal(index >= 0, "Num RBs not configured according to 3GPP 38.211 section 6.3.1.4. For PUSCH with transform precoding, num RBs cannot be multiple of any other primenumber other than 2,3,5\n");
     
     dmrs_config_type = pusch_dmrs_type1;
 
@@ -886,7 +895,7 @@ int main(int argc, char **argv)
 
   nb_re_dmrs   = nb_re_dmrs * num_dmrs_cdm_grps_no_data;
 
-  unsigned int available_bits  = nr_get_G(nb_rb, nb_symb_sch, nb_re_dmrs, number_dmrs_symbols, mod_order, 1);
+  unsigned int available_bits  = nr_get_G(nb_rb, nb_symb_sch, nb_re_dmrs, number_dmrs_symbols, mod_order, precod_nbr_layers);
   unsigned int TBS             = nr_compute_tbs(mod_order, code_rate, nb_rb, nb_symb_sch, nb_re_dmrs * number_dmrs_symbols, 0, 0, precod_nbr_layers);
 
   
@@ -1082,14 +1091,14 @@ int main(int argc, char **argv)
       pusch_pdu->mcs_table = mcs_table;
       pusch_pdu->target_code_rate = code_rate;
       pusch_pdu->qam_mod_order = mod_order;
-      pusch_pdu->transform_precoding = transform_precoding;
+      pusch_pdu->transformPrecoder = transformPrecoder;
       pusch_pdu->data_scrambling_id = *scc->physCellId;
-      pusch_pdu->nrOfLayers = 1;
+      pusch_pdu->nrOfLayers = precod_nbr_layers;
       pusch_pdu->ul_dmrs_symb_pos = l_prime_mask;
       pusch_pdu->dmrs_config_type = dmrs_config_type;
       pusch_pdu->ul_dmrs_scrambling_id =  *scc->physCellId;
       pusch_pdu->scid = 0;
-      pusch_pdu->dmrs_ports = 1;
+      pusch_pdu->dmrs_ports = ((1<<precod_nbr_layers)-1);      
       pusch_pdu->num_dmrs_cdm_grps_no_data = num_dmrs_cdm_grps_no_data;
       pusch_pdu->resource_alloc = 1; 
       pusch_pdu->rb_start = start_rb;
@@ -1109,7 +1118,7 @@ int main(int argc, char **argv)
       pusch_pdu->pusch_ptrs.ptrs_ports_list[0].ptrs_re_offset = 0;
 
       // if transform precoding is enabled
-      if (transform_precoding == 0) {
+      if (transformPrecoder == transformPrecoder_enabled) { 
 
         pusch_pdu->dfts_ofdm.low_papr_group_number = *scc->physCellId % 30; // U as defined in 38.211 section 6.4.1.1.1.2 
         pusch_pdu->dfts_ofdm.low_papr_sequence_number = 0;     // V as defined in 38.211 section 6.4.1.1.1.2
@@ -1167,10 +1176,10 @@ int main(int argc, char **argv)
       ul_config.ul_config_list[0].pusch_config_pdu.pusch_ptrs.ptrs_ports_list   = (nfapi_nr_ue_ptrs_ports_t *) malloc(2*sizeof(nfapi_nr_ue_ptrs_ports_t));
       ul_config.ul_config_list[0].pusch_config_pdu.pusch_ptrs.ptrs_ports_list[0].ptrs_re_offset = 0;
 
-      ul_config.ul_config_list[0].pusch_config_pdu.transform_precoding = transform_precoding;
+      ul_config.ul_config_list[0].pusch_config_pdu.transformPrecoder = transformPrecoder;
 
       // if transform precoding is enabled
-      if (transform_precoding == 0) {
+      if (transformPrecoder == transformPrecoder_enabled) {
    
         ul_config.ul_config_list[0].pusch_config_pdu.dfts_ofdm.low_papr_group_number = *scc->physCellId % 30;// U as defined in 38.211 section 6.4.1.1.1.2 
         ul_config.ul_config_list[0].pusch_config_pdu.dfts_ofdm.low_papr_sequence_number = 0;// V as defined in 38.211 section 6.4.1.1.1.2
@@ -1198,28 +1207,54 @@ int main(int argc, char **argv)
         LOG_D(PHY, "Sending Uplink data \n");
         nr_ue_pusch_common_procedures(UE,
                                       slot,
-                                      &UE->frame_parms,1);
-
-        if (n_trials==1) {
-          LOG_M("txsig0.m","txs0", UE->common_vars.txdata[0],frame_parms->samples_per_subframe*10,1,1);
+                                      &UE->frame_parms,
+									  frame_parms->nb_antennas_tx);
+        
+        
+        if (n_trials==1)
+        {
+          LOG_M("txsig0.m","txs0", &UE->common_vars.txdata[0][slot_offset],slot_length,1,1);
           LOG_M("txsig0F.m","txs0F", UE->common_vars.txdataF[0],frame_parms->ofdm_symbol_size*14,1,1);
+          if (precod_nbr_layers > 1)
+          {
+            LOG_M("txsig1.m","txs1", &UE->common_vars.txdata[1][slot_offset],slot_length,1,1);
+            LOG_M("txsig1F.m","txs1F", UE->common_vars.txdataF[1],frame_parms->ofdm_symbol_size*14,1,1);
+            if (precod_nbr_layers==4)
+            {          
+              LOG_M("txsig2.m","txs2", &UE->common_vars.txdata[2][slot_offset],slot_length,1,1);
+              LOG_M("txsig3.m","txs3", &UE->common_vars.txdata[3][slot_offset],slot_length,1,1);
+
+              LOG_M("txsig2F.m","txs2F", UE->common_vars.txdataF[2],frame_parms->ofdm_symbol_size*14,1,1);
+              LOG_M("txsig3F.m","txs3F", UE->common_vars.txdataF[3],frame_parms->ofdm_symbol_size*14,1,1);
+            }
+          }
         }
         ///////////
         ////////////////////////////////////////////////////
         tx_offset = frame_parms->get_samples_slot_timestamp(slot,frame_parms,0);
-
-        txlev = signal_energy(&UE->common_vars.txdata[0][tx_offset + 5*frame_parms->ofdm_symbol_size + 4*frame_parms->nb_prefix_samples + frame_parms->nb_prefix_samples0],
+        txlev_sum = 0;
+        for (int aa=0; aa<frame_parms->nb_antennas_tx; aa++) 
+        {
+          atxlev[aa] = signal_energy(&UE->common_vars.txdata[aa][tx_offset + 5*frame_parms->ofdm_symbol_size + 4*frame_parms->nb_prefix_samples + frame_parms->nb_prefix_samples0],
                               frame_parms->ofdm_symbol_size + frame_parms->nb_prefix_samples);
-      }	
-      else n_trials = 1;
-
-      if (input_fd == NULL ) {
+                              
+          txlev_sum += atxlev[aa];               
+          
+          if (n_trials==1) printf("txlev[%d] = %d (%f dB) txlev_sum %d\n",aa,atxlev[aa],10*log10((double)atxlev[aa]),txlev_sum);
+        }
+      } 
+      else
+      { 
+        n_trials = 1;
+      }
 
-        sigma_dB = 10 * log10((double)txlev * ((double)frame_parms->ofdm_symbol_size/(12*nb_rb))) - SNR;;
+      if (input_fd == NULL ) 
+      {
+        sigma_dB = 10 * log10((double)txlev_sum * ((double)frame_parms->ofdm_symbol_size/(12*nb_rb))) - SNR;;
         sigma    = pow(10,sigma_dB/10);
 
 
-        if(n_trials==1) printf("sigma %f (%f dB), txlev %f (factor %f)\n",sigma,sigma_dB,10*log10((double)txlev),(double)(double)
+        if(n_trials==1) printf("sigma %f (%f dB), txlev_sum %f (factor %f)\n",sigma,sigma_dB,10*log10((double)txlev_sum),(double)(double)
                                 frame_parms->ofdm_symbol_size/(12*nb_rb));
 
         for (i=0; i<slot_length; i++) {
@@ -1230,15 +1265,54 @@ int main(int argc, char **argv)
         }
 
 
-        if (UE2gNB->max_Doppler == 0) {
+        if (UE2gNB->max_Doppler == 0) 
+        {
           multipath_channel(UE2gNB, s_re, s_im, r_re, r_im, slot_length, 0, (n_trials==1)?1:0);
         } else {
           multipath_tv_channel(UE2gNB, s_re, s_im, r_re, r_im, 2*slot_length, 0);
         }
-        for (i=0; i<slot_length; i++) {
-          for (ap=0; ap<frame_parms->nb_antennas_rx; ap++) {
+        for (i=0; i<slot_length; i++) 
+        {
+          for (ap=0; ap<frame_parms->nb_antennas_rx; ap++) 
+          {
+            if (channel_model == AWGN) 
+            {
+              double H_awgn[4][4] ={{1.0, 0.0, 0.0, 0.0},//rx 0
+                                    {0.0, 1.0, 0.0, 0.0},  //rx 1
+                                    {0.0, 0.0, 1.0, 0.0},  //rx 2
+                                    {0.0, 0.0, 0.0, 1.0}};//rx 3
+              #if 0
+              double H_awgn_mimo[4][4] ={{1.0, 0.5, 0.25, 0.125},//rx 0
+                                   {0.5, 1.0, 0.5, 0.25},  //rx 1
+                                   {0.25, 0.5, 1.0, 0.5},  //rx 2
+                                   {0.125, 0.25, 0.5, 1.0}};//rx 3
+              #endif
+              // sum up signals from different Tx antennas
+              r_re[ap][i] = 0;
+              r_im[ap][i] = 0;
+             
+              for (int aa=0; aa<n_tx; aa++) 
+              {
+                r_re[ap][i] += s_re[aa][i]*H_awgn[ap][aa];
+                r_im[ap][i] += s_im[aa][i]*H_awgn[ap][aa];
+              }
+            }
+            
+            sigma_dB = 10 * log10((double)atxlev[ap] * ((double)frame_parms->ofdm_symbol_size/(12*nb_rb))) - SNR;;
+            sigma    = pow(10,sigma_dB/10);
+		
             ((int16_t*) &gNB->common_vars.rxdata[ap][slot_offset])[(2*i)   + (delay*2)] = (int16_t)((r_re[ap][i]) + (sqrt(sigma/2)*gaussdouble(0.0,1.0))); // convert to fixed point
             ((int16_t*) &gNB->common_vars.rxdata[ap][slot_offset])[(2*i)+1 + (delay*2)] = (int16_t)((r_im[ap][i]) + (sqrt(sigma/2)*gaussdouble(0.0,1.0)));
+            
+            #if 0
+            ((int16_t*) &gNB->common_vars.rxdata[ap][slot_offset])[(2*i)   + (delay*2)] = (int16_t)((r_re[ap][i])); // convert to fixed point
+            ((int16_t*) &gNB->common_vars.rxdata[ap][slot_offset])[(2*i)+1 + (delay*2)] = (int16_t)((r_im[ap][i]));
+
+            
+            ((int16_t*) &gNB->common_vars.rxdata[ap][slot_offset])[(2*i) ] = ((int16_t*) &UE->common_vars.txdata[ap][slot_offset])[(2*i) ]; // convert to fixed point
+            ((int16_t*) &gNB->common_vars.rxdata[ap][slot_offset])[(2*i)+1] = ((int16_t*) &UE->common_vars.txdata[ap][slot_offset])[(2*i)+1 ];
+            #endif
+
             /* Add phase noise if enabled */
             if (pdu_bit_map & PUSCH_PDU_BITMAP_PUSCH_PTRS) {
               phase_noise(ts, &((int16_t*)&gNB->common_vars.rxdata[ap][slot_offset])[(2*i)],
@@ -1250,7 +1324,8 @@ int main(int argc, char **argv)
       } /*End input_fd */
 
 
-      if(pusch_pdu->pdu_bit_map & PUSCH_PDU_BITMAP_PUSCH_PTRS) {
+      if(pusch_pdu->pdu_bit_map & PUSCH_PDU_BITMAP_PUSCH_PTRS) 
+      {
         set_ptrs_symb_idx(&ptrsSymPos,
                           pusch_pdu->nr_of_symbols,
                           pusch_pdu->start_symbol_index,
@@ -1268,24 +1343,37 @@ int main(int argc, char **argv)
 	gNB->UL_INFO.rx_ind.number_of_pdus = 0;
 	gNB->UL_INFO.crc_ind.number_crcs = 0;
 
-        phy_procedures_gNB_common_RX(gNB, frame, slot);
+    phy_procedures_gNB_common_RX(gNB, frame, slot);
 
-        ul_proc_error = phy_procedures_gNB_uespec_RX(gNB, frame, slot);
+    ul_proc_error = phy_procedures_gNB_uespec_RX(gNB, frame, slot);
 
-	if (n_trials==1 && round==0) {
+	if (n_trials==1 && round==0) 
+    {
 	  LOG_M("rxsig0.m","rx0",&gNB->common_vars.rxdata[0][slot_offset],slot_length,1,1);
 
-	  LOG_M("rxsigF0.m","rxsF0",gNB->common_vars.rxdataF[0]+start_symbol*frame_parms->ofdm_symbol_size,nb_symb_sch*frame_parms->ofdm_symbol_size,1,1);
-
+	  LOG_M("rxsigF0.m","rxsF0",gNB->common_vars.rxdataF[0],14*frame_parms->ofdm_symbol_size,1,1);
+      if (precod_nbr_layers > 1)
+      {
+          LOG_M("rxsig1.m","rx1",&gNB->common_vars.rxdata[1][slot_offset],slot_length,1,1);
+          LOG_M("rxsigF1.m","rxsF1",gNB->common_vars.rxdataF[1],14*frame_parms->ofdm_symbol_size,1,1);
+          if (precod_nbr_layers==4)
+          {      
+            LOG_M("rxsig2.m","rx2",&gNB->common_vars.rxdata[2][slot_offset],slot_length,1,1);
+            LOG_M("rxsig3.m","rx3",&gNB->common_vars.rxdata[3][slot_offset],slot_length,1,1);
+            
+            LOG_M("rxsigF2.m","rxsF2",gNB->common_vars.rxdataF[2],14*frame_parms->ofdm_symbol_size,1,1);
+            LOG_M("rxsigF3.m","rxsF3",gNB->common_vars.rxdataF[3],14*frame_parms->ofdm_symbol_size,1,1);
+          }
+      }
 	}
 
 
 	if (n_trials == 1  && round==0) {
-#ifdef __AVX2__
+  #ifdef __AVX2__
 	  int off = ((nb_rb&1) == 1)? 4:0;
-#else
+  #else
 	  int off = 0;
-#endif
+  #endif
 
 	  LOG_M("rxsigF0_ext.m","rxsF0_ext",
 		&gNB->pusch_vars[0]->rxdataF_ext[0][start_symbol*NR_NB_SC_PER_RB * pusch_pdu->rb_size],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
@@ -1298,11 +1386,77 @@ int main(int argc, char **argv)
 		(nb_symb_sch-1)*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
 	  LOG_M("rxsigF0_comp.m","rxsF0_comp",
 		&gNB->pusch_vars[0]->rxdataF_comp[0][start_symbol*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
-    LOG_M("chmagF0.m","chmF0",
-    &gNB->pusch_vars[0]->ul_ch_mag[0][start_symbol*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
-    LOG_M("chmagbF0.m","chmbF0",
-    &gNB->pusch_vars[0]->ul_ch_magb[0][start_symbol*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
-    if (n_rx == 2) {
+      LOG_M("chmagF0.m","chmF0",
+        &gNB->pusch_vars[0]->ul_ch_mag[0][start_symbol*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
+      LOG_M("chmagbF0.m","chmbF0",
+        &gNB->pusch_vars[0]->ul_ch_magb[0][start_symbol*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
+      LOG_M("rxsigF0_llrlayers0.m","rxsF0_llrlayers0",
+            &gNB->pusch_vars[0]->llr_layers[0][0],(nb_symb_sch-1)*NR_NB_SC_PER_RB * pusch_pdu->rb_size * mod_order,1,0);
+    
+      if (precod_nbr_layers==2)
+      {
+         LOG_M("rxsigF1_ext.m","rxsF1_ext",
+            &gNB->pusch_vars[0]->rxdataF_ext[1][start_symbol*NR_NB_SC_PER_RB * pusch_pdu->rb_size],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
+			  
+        LOG_M("chestF3.m","chF3",
+            &gNB->pusch_vars[0]->ul_ch_estimates[3][start_symbol*frame_parms->ofdm_symbol_size],frame_parms->ofdm_symbol_size,1,1);
+        
+        LOG_M("chestF3_ext.m","chF3_ext",
+        &gNB->pusch_vars[0]->ul_ch_estimates_ext[3][(start_symbol+1)*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],
+              (nb_symb_sch-1)*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
+
+        LOG_M("rxsigF2_comp.m","rxsF2_comp",
+            &gNB->pusch_vars[0]->rxdataF_comp[2][start_symbol*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
+        
+        LOG_M("rxsigF0_llrlayers1.m","rxsF0_llrlayers1",
+        &gNB->pusch_vars[0]->llr_layers[1][0],(nb_symb_sch-1)*NR_NB_SC_PER_RB * pusch_pdu->rb_size * mod_order,1,0);
+
+        }
+        
+        if (precod_nbr_layers==4)
+        {
+          LOG_M("rxsigF1_ext.m","rxsF1_ext",
+            &gNB->pusch_vars[0]->rxdataF_ext[1][start_symbol*NR_NB_SC_PER_RB * pusch_pdu->rb_size],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
+         LOG_M("rxsigF2_ext.m","rxsF2_ext",
+            &gNB->pusch_vars[0]->rxdataF_ext[2][start_symbol*NR_NB_SC_PER_RB * pusch_pdu->rb_size],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
+        LOG_M("rxsigF3_ext.m","rxsF3_ext",
+            &gNB->pusch_vars[0]->rxdataF_ext[3][start_symbol*NR_NB_SC_PER_RB * pusch_pdu->rb_size],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
+        
+        LOG_M("chestF5.m","chF5",
+            &gNB->pusch_vars[0]->ul_ch_estimates[5][start_symbol*frame_parms->ofdm_symbol_size],frame_parms->ofdm_symbol_size,1,1);
+        LOG_M("chestF10.m","chF10",
+            &gNB->pusch_vars[0]->ul_ch_estimates[10][start_symbol*frame_parms->ofdm_symbol_size],frame_parms->ofdm_symbol_size,1,1);
+        LOG_M("chestF15.m","chF15",
+            &gNB->pusch_vars[0]->ul_ch_estimates[15][start_symbol*frame_parms->ofdm_symbol_size],frame_parms->ofdm_symbol_size,1,1);
+
+
+        LOG_M("chestF5_ext.m","chF5_ext",
+        &gNB->pusch_vars[0]->ul_ch_estimates_ext[5][(start_symbol+1)*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],
+              (nb_symb_sch-1)*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
+        LOG_M("chestF10_ext.m","chF10_ext",
+        &gNB->pusch_vars[0]->ul_ch_estimates_ext[10][(start_symbol+1)*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],
+              (nb_symb_sch-1)*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
+        LOG_M("chestF15_ext.m","chF15_ext",
+        &gNB->pusch_vars[0]->ul_ch_estimates_ext[15][(start_symbol+1)*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],
+             (nb_symb_sch-1)*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
+
+
+        LOG_M("rxsigF4_comp.m","rxsF4_comp",
+            &gNB->pusch_vars[0]->rxdataF_comp[4][start_symbol*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
+        LOG_M("rxsigF8_comp.m","rxsF8_comp",
+            &gNB->pusch_vars[0]->rxdataF_comp[8][start_symbol*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
+        LOG_M("rxsigF12_comp.m","rxsF12_comp",
+            &gNB->pusch_vars[0]->rxdataF_comp[12][start_symbol*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
+        LOG_M("rxsigF0_llrlayers1.m","rxsF0_llrlayers1",
+        &gNB->pusch_vars[0]->llr_layers[1][0],(nb_symb_sch-1)*NR_NB_SC_PER_RB * pusch_pdu->rb_size * mod_order,1,0);
+        LOG_M("rxsigF0_llrlayers2.m","rxsF0_llrlayers2",
+        &gNB->pusch_vars[0]->llr_layers[2][0],(nb_symb_sch-1)*NR_NB_SC_PER_RB * pusch_pdu->rb_size * mod_order,1,0);
+        LOG_M("rxsigF0_llrlayers3.m","rxsF0_llrlayers3",
+        &gNB->pusch_vars[0]->llr_layers[3][0],(nb_symb_sch-1)*NR_NB_SC_PER_RB * pusch_pdu->rb_size * mod_order,1,0);
+    }
+		
+    #if 0		
+	if (n_rx == 2) {
       LOG_MM("rxsigF0_comp.m","rxsF1_comp",
       &gNB->pusch_vars[0]->rxdataF_comp[1][start_symbol*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
       LOG_MM("rxsigF0_ext.m","rxsF1_ext",
@@ -1349,9 +1503,10 @@ int main(int argc, char **argv)
       LOG_MM("chmagbF0.m","chmbF3",
       &gNB->pusch_vars[0]->ul_ch_magb[3][start_symbol*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size))],nb_symb_sch*(off+(NR_NB_SC_PER_RB * pusch_pdu->rb_size)),1,1);
     }
+	#endif
 
 	  LOG_M("rxsigF0_llr.m","rxsF0_llr",
-		&gNB->pusch_vars[0]->llr[0],(nb_symb_sch-1)*NR_NB_SC_PER_RB * pusch_pdu->rb_size * mod_order,1,0);
+		&gNB->pusch_vars[0]->llr[0],precod_nbr_layers*(nb_symb_sch-1)*NR_NB_SC_PER_RB * pusch_pdu->rb_size * mod_order,1,0);
 	}
         ////////////////////////////////////////////////////////////
 
diff --git a/openair2/LAYER2/NR_MAC_COMMON/nr_mac_common.c b/openair2/LAYER2/NR_MAC_COMMON/nr_mac_common.c
index 3f4be9dd58c..7e5be9eebad 100644
--- a/openair2/LAYER2/NR_MAC_COMMON/nr_mac_common.c
+++ b/openair2/LAYER2/NR_MAC_COMMON/nr_mac_common.c
@@ -2708,6 +2708,9 @@ uint16_t nr_dci_size(const NR_BWP_DownlinkCommon_t *initialDownlinkBWP,
       LOG_D(NR_MAC,"dci_pdu->srs_resource_indicator.nbits %d\n",dci_pdu->srs_resource_indicator.nbits);
       // Precoding info and number of layers
       long transformPrecoder = get_transformPrecoding(initialUplinkBWP, pusch_Config, ubwpd, (uint8_t*)&format, rnti_type, 0);
+       
+	  pusch_antenna_ports = *pusch_Config->maxRank;
+	   
       dci_pdu->precoding_information.nbits=0;
       if (pusch_Config && 
           pusch_Config->txConfig != NULL){
diff --git a/openair2/LAYER2/NR_MAC_UE/nr_ue_scheduler.c b/openair2/LAYER2/NR_MAC_UE/nr_ue_scheduler.c
index c23d4bb0730..963723725f5 100644
--- a/openair2/LAYER2/NR_MAC_UE/nr_ue_scheduler.c
+++ b/openair2/LAYER2/NR_MAC_UE/nr_ue_scheduler.c
@@ -190,7 +190,7 @@ void ul_layers_config(NR_UE_MAC_INST_t * mac, nfapi_nr_ue_pusch_pdu_t *pusch_con
     else
       transformPrecoder = NR_PUSCH_Config__transformPrecoder_disabled;
   }
-
+  pusch_config_pdu->transformPrecoder = transformPrecoder;
 
   /* PRECOD_NBR_LAYERS */
   if ((*pusch_Config->txConfig == NR_PUSCH_Config__txConfig_nonCodebook));
@@ -198,11 +198,12 @@ void ul_layers_config(NR_UE_MAC_INST_t * mac, nfapi_nr_ue_pusch_pdu_t *pusch_con
 
   if ((*pusch_Config->txConfig == NR_PUSCH_Config__txConfig_codebook)){
 
-    uint8_t n_antenna_port = 0; //FIXME!!!
+    uint8_t n_antenna_port = mac->phy_config.config_req.carrier_config.num_tx_ant;   
 
     if (n_antenna_port == 1); // 1 antenna port and the higher layer parameter txConfig = codebook 0 bits
 
-    if (n_antenna_port == 4){ // 4 antenna port and the higher layer parameter txConfig = codebook
+    if (n_antenna_port == 4)
+    { // 4 antenna port and the higher layer parameter txConfig = codebook
 
       // Table 7.3.1.1.2-2: transformPrecoder=disabled and maxRank = 2 or 3 or 4
       if ((transformPrecoder == NR_PUSCH_Config__transformPrecoder_disabled)
@@ -212,54 +213,56 @@ void ul_layers_config(NR_UE_MAC_INST_t * mac, nfapi_nr_ue_pusch_pdu_t *pusch_con
 
         if (*pusch_Config->codebookSubset == NR_PUSCH_Config__codebookSubset_fullyAndPartialAndNonCoherent) {
           pusch_config_pdu->nrOfLayers = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][0];
-          pusch_config_pdu->transform_precoding = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][1];
+          pusch_config_pdu->Tpmi = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][1];
         }
 
         if (*pusch_Config->codebookSubset == NR_PUSCH_Config__codebookSubset_partialAndNonCoherent){
           pusch_config_pdu->nrOfLayers = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][2];
-          pusch_config_pdu->transform_precoding = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][3];
+          pusch_config_pdu->Tpmi = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][3];
         }
 
         if (*pusch_Config->codebookSubset == NR_PUSCH_Config__codebookSubset_nonCoherent){
           pusch_config_pdu->nrOfLayers = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][4];
-          pusch_config_pdu->transform_precoding = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][5];
+          pusch_config_pdu->Tpmi = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][5];
         }
       }
 
       // Table 7.3.1.1.2-3: transformPrecoder= enabled, or transformPrecoder=disabled and maxRank = 1
       if (((transformPrecoder == NR_PUSCH_Config__transformPrecoder_enabled)
         || (transformPrecoder == NR_PUSCH_Config__transformPrecoder_disabled))
-        && (*pusch_Config->maxRank == 1)){
+        && (*pusch_Config->maxRank == 1))
+      {
 
         if (*pusch_Config->codebookSubset == NR_PUSCH_Config__codebookSubset_fullyAndPartialAndNonCoherent) {
           pusch_config_pdu->nrOfLayers = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][6];
-          pusch_config_pdu->transform_precoding = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][7];
+          pusch_config_pdu->Tpmi = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][7];
         }
 
         if (*pusch_Config->codebookSubset == NR_PUSCH_Config__codebookSubset_partialAndNonCoherent){
           pusch_config_pdu->nrOfLayers = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][8];
-          pusch_config_pdu->transform_precoding = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][9];
+          pusch_config_pdu->Tpmi = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][9];
         }
 
         if (*pusch_Config->codebookSubset == NR_PUSCH_Config__codebookSubset_nonCoherent){
           pusch_config_pdu->nrOfLayers = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][10];
-          pusch_config_pdu->transform_precoding = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][11];
+          pusch_config_pdu->Tpmi = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][11];
         }
       }
     }
 
-    if (n_antenna_port == 4){ // 2 antenna port and the higher layer parameter txConfig = codebook
+    if (n_antenna_port == 2)
+    { // 2 antenna port and the higher layer parameter txConfig = codebook
       // Table 7.3.1.1.2-4: transformPrecoder=disabled and maxRank = 2
-      if ((transformPrecoder == NR_PUSCH_Config__transformPrecoder_disabled) && (*pusch_Config->maxRank == 2)){
-
+      if ((transformPrecoder == NR_PUSCH_Config__transformPrecoder_disabled) && (*pusch_Config->maxRank == 2))
+      {
         if (*pusch_Config->codebookSubset == NR_PUSCH_Config__codebookSubset_fullyAndPartialAndNonCoherent) {
           pusch_config_pdu->nrOfLayers = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][12];
-          pusch_config_pdu->transform_precoding = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][13];
+          pusch_config_pdu->Tpmi = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][13];
         }
 
         if (*pusch_Config->codebookSubset == NR_PUSCH_Config__codebookSubset_nonCoherent){
           pusch_config_pdu->nrOfLayers = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][14];
-          pusch_config_pdu->transform_precoding = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][15];
+          pusch_config_pdu->Tpmi = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][15];
         }
 
       }
@@ -267,16 +270,17 @@ void ul_layers_config(NR_UE_MAC_INST_t * mac, nfapi_nr_ue_pusch_pdu_t *pusch_con
       // Table 7.3.1.1.2-5: transformPrecoder= enabled, or transformPrecoder= disabled and maxRank = 1
       if (((transformPrecoder == NR_PUSCH_Config__transformPrecoder_enabled)
         || (transformPrecoder == NR_PUSCH_Config__transformPrecoder_disabled))
-        && (*pusch_Config->maxRank == 1)){
+        && (*pusch_Config->maxRank == 1))
+      {
 
         if (*pusch_Config->codebookSubset == NR_PUSCH_Config__codebookSubset_fullyAndPartialAndNonCoherent) {
           pusch_config_pdu->nrOfLayers = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][16];
-          pusch_config_pdu->transform_precoding = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][17];
+          pusch_config_pdu->Tpmi = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][17];
         }
 
         if (*pusch_Config->codebookSubset == NR_PUSCH_Config__codebookSubset_nonCoherent){
           pusch_config_pdu->nrOfLayers = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][18];
-          pusch_config_pdu->transform_precoding = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][19];
+          pusch_config_pdu->Tpmi = table_7_3_1_1_2_2_3_4_5[dci->precoding_information.val][19];
         }
 
       }
@@ -285,9 +289,9 @@ void ul_layers_config(NR_UE_MAC_INST_t * mac, nfapi_nr_ue_pusch_pdu_t *pusch_con
 
   /*-------------------- Changed to enable Transform precoding in RF SIM------------------------------------------------*/
 
- /*if (pusch_config_pdu->transform_precoding == transform_precoder_enabled) {
+ /*if (pusch_config_pdu->transformPrecoder == transformPrecoder_enabled) {
 
-    pusch_config_dedicated->transform_precoder = transform_precoder_enabled;
+    pusch_config_dedicated->transform_precoder = transformPrecoder_enabled;
 
     if(pusch_Config->dmrs_UplinkForPUSCH_MappingTypeA != NULL) {
 
@@ -309,7 +313,7 @@ void ul_layers_config(NR_UE_MAC_INST_t * mac, nfapi_nr_ue_pusch_pdu_t *pusch_con
 
     }
   } else
-    pusch_config_dedicated->transform_precoder = transform_precoder_disabled;*/
+    pusch_config_dedicated->transformPrecoder = ttransformPrecoder_disabled;*/
 }
 
 // todo: this function shall be reviewed completely because of the many comments left by the author
@@ -342,7 +346,7 @@ void ul_ports_config(NR_UE_MAC_INST_t * mac, nfapi_nr_ue_pusch_pdu_t *pusch_conf
   }
   long *max_length = NULL;
   long *dmrs_type = NULL;
-  LOG_D(NR_MAC,"transformPrecoder %s\n",transformPrecoder==NR_PUSCH_Config__transformPrecoder_disabled?"disabled":"enabled");
+  LOG_I(NR_MAC,"transformPrecoder %s\n",transformPrecoder==NR_PUSCH_Config__transformPrecoder_disabled?"disabled":"enabled");
 
   if (pusch_Config->dmrs_UplinkForPUSCH_MappingTypeA) {
     max_length = pusch_Config->dmrs_UplinkForPUSCH_MappingTypeA->choice.setup->maxLength;
@@ -353,7 +357,7 @@ void ul_ports_config(NR_UE_MAC_INST_t * mac, nfapi_nr_ue_pusch_pdu_t *pusch_conf
     dmrs_type = pusch_Config->dmrs_UplinkForPUSCH_MappingTypeB->choice.setup->dmrs_Type;
   }
 
-  LOG_D(NR_MAC,"MappingType%s max_length %s, dmrs_type %s, antenna_ports %d\n",pusch_Config->dmrs_UplinkForPUSCH_MappingTypeA?"A":"B",max_length?"len2":"len1",dmrs_type?"type2":"type1",dci->antenna_ports.val);
+  LOG_I(NR_MAC,"MappingType%s max_length %s, dmrs_type %s, antenna_ports %d\n",pusch_Config->dmrs_UplinkForPUSCH_MappingTypeA?"A":"B",max_length?"len2":"len1",dmrs_type?"type2":"type1",dci->antenna_ports.val);
   if ((transformPrecoder == NR_PUSCH_Config__transformPrecoder_enabled) &&
     (dmrs_type == NULL) && (max_length == NULL)) { // tables 7.3.1.1.2-6
       pusch_config_pdu->num_dmrs_cdm_grps_no_data = 2; //TBC
@@ -369,35 +373,32 @@ void ul_ports_config(NR_UE_MAC_INST_t * mac, nfapi_nr_ue_pusch_pdu_t *pusch_conf
   }
 
   if ((transformPrecoder == NR_PUSCH_Config__transformPrecoder_disabled) &&
-    (dmrs_type == NULL) && (max_length == NULL)) { // tables 7.3.1.1.2-8/9/10/11
+    (dmrs_type == NULL) && (max_length == NULL)) 
+  { // tables 7.3.1.1.2-8/9/10/11
+    
+    if ((*pusch_Config->txConfig == NR_PUSCH_Config__txConfig_codebook))
+    {
+      rank = pusch_config_pdu->nrOfLayers;
+    } 
 
     if (rank == 1) {
-      pusch_config_pdu->num_dmrs_cdm_grps_no_data = (dci->antenna_ports.val > 1)?2:1; //TBC
-      pusch_config_pdu->dmrs_ports = (dci->antenna_ports.val > 1)?(dci->antenna_ports.val-2):(dci->antenna_ports.val); //TBC
+      pusch_config_pdu->num_dmrs_cdm_grps_no_data = (dci->antenna_ports.val > 1)?2:1;
+      pusch_config_pdu->dmrs_ports =1<<((dci->antenna_ports.val > 1)?(dci->antenna_ports.val-2):(dci->antenna_ports.val));
     }
 
     if (rank == 2){
-      pusch_config_pdu->num_dmrs_cdm_grps_no_data = (dci->antenna_ports.val > 0)?2:1; //TBC
-      pusch_config_pdu->dmrs_ports = 0; //FIXME
-      //pusch_config_pdu->dmrs_ports[0] = (dci->antenna_ports > 1)?(dci->antenna_ports > 2 ?0:2):0;
-      //pusch_config_pdu->dmrs_ports[1] = (dci->antenna_ports > 1)?(dci->antenna_ports > 2 ?2:3):1;
+      pusch_config_pdu->num_dmrs_cdm_grps_no_data = (dci->antenna_ports.val > 0)?2:1;
+      pusch_config_pdu->dmrs_ports = (dci->antenna_ports.val > 1)?((dci->antenna_ports.val> 2)?0x5:0xc):0x3;
     }
 
     if (rank == 3){
-      pusch_config_pdu->num_dmrs_cdm_grps_no_data = 2; //TBC
-      pusch_config_pdu->dmrs_ports = 0; //FIXME
-      //pusch_config_pdu->dmrs_ports[0] = 0;
-      //pusch_config_pdu->dmrs_ports[1] = 1;
-      //pusch_config_pdu->dmrs_ports[2] = 2;
+      pusch_config_pdu->num_dmrs_cdm_grps_no_data = 2;
+      pusch_config_pdu->dmrs_ports = ((1<<rank)-1);
     }
 
     if (rank == 4){
-      pusch_config_pdu->num_dmrs_cdm_grps_no_data = 2; //TBC
-      pusch_config_pdu->dmrs_ports = 0; //FIXME
-      //pusch_config_pdu->dmrs_ports[0] = 0;
-      //pusch_config_pdu->dmrs_ports[1] = 1;
-      //pusch_config_pdu->dmrs_ports[2] = 2;
-      //pusch_config_pdu->dmrs_ports[3] = 3;
+      pusch_config_pdu->num_dmrs_cdm_grps_no_data = 2;
+      pusch_config_pdu->dmrs_ports = ((1<<rank)-1);
     }
   }
 
@@ -508,7 +509,7 @@ void ul_ports_config(NR_UE_MAC_INST_t * mac, nfapi_nr_ue_pusch_pdu_t *pusch_conf
       //pusch_config_pdu->n_front_load_symb = table_7_3_1_1_2_23[dci->antenna_ports.val][5]; //FIXME
     }
   }
-  LOG_D(NR_MAC,"num_dmrs_cdm_grps_no_data %d, dmrs_ports %d\n",pusch_config_pdu->num_dmrs_cdm_grps_no_data,pusch_config_pdu->dmrs_ports);
+  LOG_I(NR_MAC,"num_dmrs_cdm_grps_no_data %d, dmrs_ports %d\n",pusch_config_pdu->num_dmrs_cdm_grps_no_data,pusch_config_pdu->dmrs_ports);
 }
 
 // Configuration of Msg3 PDU according to clauses:
@@ -542,6 +543,7 @@ int nr_config_pusch_pdu(NR_UE_MAC_INST_t *mac,
   pusch_config_pdu->dmrs_config_type = pusch_dmrs_type1;
   pusch_config_pdu->pdu_bit_map      = PUSCH_PDU_BITMAP_PUSCH_DATA;
   pusch_config_pdu->nrOfLayers       = 1;
+  pusch_config_pdu->Tpmi             = 0;
   pusch_config_pdu->rnti             = rnti;
   NR_BWP_UplinkCommon_t *initialUplinkBWP;
   if (mac->scc) initialUplinkBWP = mac->scc->uplinkConfigCommon->initialUplinkBWP;
@@ -634,7 +636,7 @@ int nr_config_pusch_pdu(NR_UE_MAC_INST_t *mac,
     pusch_config_pdu->scid = 0;
 
     // Transform precoding according to 6.1.3 UE procedure for applying transform precoding on PUSCH in 38.214
-    pusch_config_pdu->transform_precoding = get_transformPrecoding(initialUplinkBWP, pusch_Config, NULL, NULL, NR_RNTI_TC, 0); // TBR fix rnti and take out
+    pusch_config_pdu->transformPrecoder = get_transformPrecoding(initialUplinkBWP, pusch_Config, NULL, NULL, NR_RNTI_TC, 0); // TBR fix rnti and take out
 
     // Resource allocation in frequency domain according to 6.1.2.2 in TS 38.214
     pusch_config_pdu->resource_alloc = (mac->cg) ? pusch_Config->resourceAllocation : 1;
@@ -680,7 +682,7 @@ int nr_config_pusch_pdu(NR_UE_MAC_INST_t *mac,
 
     /* Transform precoding */
     if (rnti_type != NR_RNTI_CS || (rnti_type == NR_RNTI_CS && dci->ndi == 1)) {
-      pusch_config_pdu->transform_precoding = get_transformPrecoding(initialUplinkBWP, pusch_Config, NULL, dci_format, rnti_type, 0);
+      pusch_config_pdu->transformPrecoder = get_transformPrecoding(initialUplinkBWP, pusch_Config, NULL, dci_format, rnti_type, 0);
     }
 
     /*DCI format-related configuration*/
@@ -716,7 +718,7 @@ int nr_config_pusch_pdu(NR_UE_MAC_INST_t *mac,
 
     /* TRANSFORM PRECODING ------------------------------------------------------------------------------------------*/
 
-    if (pusch_config_pdu->transform_precoding == NR_PUSCH_Config__transformPrecoder_enabled) {
+    if (pusch_config_pdu->transformPrecoder == NR_PUSCH_Config__transformPrecoder_enabled) {
 
       pusch_config_pdu->num_dmrs_cdm_grps_no_data = 2;
 
@@ -761,7 +763,7 @@ int nr_config_pusch_pdu(NR_UE_MAC_INST_t *mac,
     pusch_config_pdu->mcs_index = dci->mcs;
 
     /* MCS TABLE */
-    if (pusch_config_pdu->transform_precoding == NR_PUSCH_Config__transformPrecoder_disabled) {
+    if (pusch_config_pdu->transformPrecoder == NR_PUSCH_Config__transformPrecoder_disabled) {
       pusch_config_pdu->mcs_table = get_pusch_mcs_table(pusch_Config ? pusch_Config->mcs_Table : NULL, 0, *dci_format, rnti_type, target_ss, false);
     } else {
       pusch_config_pdu->mcs_table = get_pusch_mcs_table(pusch_Config ? pusch_Config->mcs_TableTransformPrecoder : NULL, 1, *dci_format, rnti_type, target_ss, false);
@@ -798,9 +800,14 @@ int nr_config_pusch_pdu(NR_UE_MAC_INST_t *mac,
                                mappingtype, add_pos, dmrslength,
                                pusch_config_pdu->start_symbol_index,
                                mac->scc ? mac->scc->dmrs_TypeA_Position : mac->mib->dmrs_TypeA_Position);
-    if ((mac->ULbwp[0] && pusch_config_pdu->transform_precoding == NR_PUSCH_Config__transformPrecoder_disabled))
-      pusch_config_pdu->num_dmrs_cdm_grps_no_data = 1;
-    else if (*dci_format == NR_UL_DCI_FORMAT_0_0 || (mac->ULbwp[0] && pusch_config_pdu->transform_precoding == NR_PUSCH_Config__transformPrecoder_enabled))
+    if (mac->ULbwp[0] && pusch_config_pdu->transformPrecoder == NR_PUSCH_Config__transformPrecoder_disabled)
+    { 
+	  if (*dci_format != NR_UL_DCI_FORMAT_0_1)
+	  {
+        pusch_config_pdu->num_dmrs_cdm_grps_no_data = 1;
+	  }
+    }
+    else if (*dci_format == NR_UL_DCI_FORMAT_0_0 || (mac->ULbwp[0] && pusch_config_pdu->transformPrecoder == NR_PUSCH_Config__transformPrecoder_enabled))
       pusch_config_pdu->num_dmrs_cdm_grps_no_data = 2;
 
     // Num PRB Overhead from PUSCH-ServingCellConfig
@@ -821,7 +828,7 @@ int nr_config_pusch_pdu(NR_UE_MAC_INST_t *mac,
 	mac->ULbwp[0]->bwp_Dedicated->pusch_Config->choice.setup &&
 	mac->ULbwp[0]->bwp_Dedicated->pusch_Config->choice.setup->dmrs_UplinkForPUSCH_MappingTypeB &&
 	mac->ULbwp[0]->bwp_Dedicated->pusch_Config->choice.setup->dmrs_UplinkForPUSCH_MappingTypeB->choice.setup->phaseTrackingRS) {
-      if (pusch_config_pdu->transform_precoding == NR_PUSCH_Config__transformPrecoder_disabled) {
+      if (pusch_config_pdu->transformPrecoder == NR_PUSCH_Config__transformPrecoder_disabled) {
         nfapi_nr_ue_ptrs_ports_t ptrs_ports_list;
         pusch_config_pdu->pusch_ptrs.ptrs_ports_list = &ptrs_ports_list;
         valid_ptrs_setup = set_ul_ptrs_values(mac->ULbwp[0]->bwp_Dedicated->pusch_Config->choice.setup->dmrs_UplinkForPUSCH_MappingTypeB->choice.setup->phaseTrackingRS->choice.setup,
@@ -909,6 +916,7 @@ NR_UE_L2_STATE_t nr_ue_scheduler(nr_downlink_indication_t *dl_info, nr_uplink_in
 
       fill_scheduled_response(&scheduled_response, &dcireq.dl_config_req, NULL, NULL, mod_id, cc_id, rx_frame, rx_slot, dl_info->thread_id);
       if(mac->if_module != NULL && mac->if_module->scheduled_response != NULL)
+        LOG_I(NR_MAC,"1# scheduled_response transmitted, %d, %d\n", rx_frame, rx_slot);
         mac->if_module->scheduled_response(&scheduled_response);
     }
     else {
@@ -1022,6 +1030,7 @@ NR_UE_L2_STATE_t nr_ue_scheduler(nr_downlink_indication_t *dl_info, nr_uplink_in
 
         fill_scheduled_response(&scheduled_response, NULL, ul_config, &tx_req, mod_id, cc_id, rx_frame, rx_slot, ul_info->thread_id);
         if(mac->if_module != NULL && mac->if_module->scheduled_response != NULL){
+          LOG_I(NR_MAC,"3# scheduled_response transmitted,%d, %d\n", frame_tx, slot_tx);
           mac->if_module->scheduled_response(&scheduled_response);
         }
         pthread_mutex_lock(&ul_config->mutex_ul_config);
diff --git a/openair2/LAYER2/NR_MAC_gNB/config.c b/openair2/LAYER2/NR_MAC_gNB/config.c
index 1cb3fdb7084..d9c98852a34 100644
--- a/openair2/LAYER2/NR_MAC_gNB/config.c
+++ b/openair2/LAYER2/NR_MAC_gNB/config.c
@@ -388,7 +388,7 @@ void config_common(int Mod_idP, int ssb_SubcarrierOffset, int pdsch_AntennaPorts
   cfg->carrier_config.num_rx_ant.value = pusch_AntennaPorts;
   AssertFatal(pusch_AntennaPorts > 0 && pusch_AntennaPorts < 13, "pusch_AntennaPorts in 1...12\n");
   cfg->carrier_config.num_rx_ant.tl.tag = NFAPI_NR_CONFIG_NUM_RX_ANT_TAG;
-  LOG_I(NR_MAC,"Set TX/RX antenna number to %d (num ssb %d: %x,%x)\n",cfg->carrier_config.num_tx_ant.value,num_ssb,cfg->ssb_table.ssb_mask_list[0].ssb_mask.value,cfg->ssb_table.ssb_mask_list[1].ssb_mask.value);
+  LOG_I(NR_MAC,"Set RX antenna number to %d, Set TX antenna number to %d (num ssb %d: %x,%x)\n",cfg->carrier_config.num_tx_ant.value,cfg->carrier_config.num_rx_ant.value,num_ssb,cfg->ssb_table.ssb_mask_list[0].ssb_mask.value,cfg->ssb_table.ssb_mask_list[1].ssb_mask.value);
   AssertFatal(cfg->carrier_config.num_tx_ant.value > 0,"carrier_config.num_tx_ant.value %d !\n",cfg->carrier_config.num_tx_ant.value );
   cfg->num_tlv++;
   cfg->num_tlv++;
diff --git a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_RA.c b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_RA.c
index 6ac7f3f82e9..5de36bb2db2 100644
--- a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_RA.c
+++ b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_RA.c
@@ -977,9 +977,9 @@ void fill_msg3_pusch_pdu(nfapi_nr_pusch_pdu_t *pusch_pdu,
   pusch_pdu->target_code_rate = nr_get_code_rate_ul(pusch_pdu->mcs_index,pusch_pdu->mcs_table);
   pusch_pdu->qam_mod_order = nr_get_Qm_ul(pusch_pdu->mcs_index,pusch_pdu->mcs_table);
   if (scc->uplinkConfigCommon->initialUplinkBWP->rach_ConfigCommon->choice.setup->msg3_transformPrecoder == NULL)
-    pusch_pdu->transform_precoding = 1;
+    pusch_pdu->transformPrecoder = 1;
   else
-    pusch_pdu->transform_precoding = 0;
+    pusch_pdu->transformPrecoder = 0;
   pusch_pdu->data_scrambling_id = *scc->physCellId;
   pusch_pdu->nrOfLayers = 1;
   pusch_pdu->ul_dmrs_symb_pos = get_l_prime(nr_of_symbols,mappingtype,pusch_dmrs_pos2,pusch_len1,start_symbol_index, scc->dmrs_TypeA_Position);
diff --git a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_dlsch.c b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_dlsch.c
index abc007197d9..9cbbe7d9a18 100644
--- a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_dlsch.c
+++ b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_dlsch.c
@@ -1139,13 +1139,14 @@ void nr_schedule_ue_spec(module_id_t module_id,
     dci_payload.dmrs_sequence_initialization.val = pdsch_pdu->SCID;
     LOG_D(NR_MAC,
           "%4d.%2d DCI type 1 payload: freq_alloc %d (%d,%d,%d), "
-          "time_alloc %d, vrb to prb %d, mcs %d tb_scaling %d ndi %d rv %d tpc %d ti %d\n",
+          "nrOfLayers %d, time_alloc %d, vrb to prb %d, mcs %d tb_scaling %d ndi %d rv %d tpc %d ti %d\n",
           frame,
           slot,
           dci_payload.frequency_domain_assignment.val,
           pdsch_pdu->rbStart,
           pdsch_pdu->rbSize,
           pdsch_pdu->BWPSize,
+          pdsch_pdu->nrOfLayers,
           dci_payload.time_domain_assignment.val,
           dci_payload.vrb_to_prb_mapping.val,
           dci_payload.mcs,
diff --git a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_phytest.c b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_phytest.c
index f1005cea70d..8f7c856a307 100644
--- a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_phytest.c
+++ b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_phytest.c
@@ -405,6 +405,7 @@ void nr_preprocessor_phytest(module_id_t module_id,
 
 uint32_t target_ul_mcs = 9;
 uint32_t target_ul_bw = 50;
+uint32_t target_ul_Nl = 1;
 uint64_t ulsch_slot_bitmap = (1 << 8);
 bool nr_ul_preprocessor_phytest(module_id_t module_id, frame_t frame, sub_frame_t slot)
 {
@@ -446,15 +447,21 @@ bool nr_ul_preprocessor_phytest(module_id_t module_id, frame_t frame, sub_frame_
 
   const long f = sched_ctrl->search_space->searchSpaceType->choice.ue_Specific->dci_Formats;
   const int dci_format = f ? NR_UL_DCI_FORMAT_0_1 : NR_UL_DCI_FORMAT_0_0;
-  const uint8_t num_dmrs_cdm_grps_no_data = 1;
+  uint8_t num_dmrs_cdm_grps_no_data = 1;
+  if ((target_ul_Nl==4)||(target_ul_Nl==3))
+  {
+    num_dmrs_cdm_grps_no_data = 2;
+  }
+  
   /* we want to avoid a lengthy deduction of DMRS and other parameters in
    * every TTI if we can save it, so check whether dci_format, TDA, or
    * num_dmrs_cdm_grps_no_data has changed and only then recompute */
   NR_pusch_semi_static_t *ps = &sched_ctrl->pusch_semi_static;
   if (ps->time_domain_allocation != tda
       || ps->dci_format != dci_format
+      || ps->nrOfLayers != target_ul_Nl
       || ps->num_dmrs_cdm_grps_no_data != num_dmrs_cdm_grps_no_data)
-    nr_set_pusch_semi_static(scc, sched_ctrl->active_ubwp, NULL,dci_format, tda, num_dmrs_cdm_grps_no_data, ps);
+    nr_set_pusch_semi_static(scc, sched_ctrl->active_ubwp, NULL,dci_format, tda, num_dmrs_cdm_grps_no_data,target_ul_Nl,ps);
 
   uint16_t rbStart = 0;
   uint16_t rbSize;
@@ -525,6 +532,7 @@ bool nr_ul_preprocessor_phytest(module_id_t module_id, frame_t frame, sub_frame_
   sched_pusch->ul_harq_pid = sched_ctrl->retrans_ul_harq.head;
 
   /* Calculate TBS from MCS */
+  ps->nrOfLayers = target_ul_Nl;
   sched_pusch->R = nr_get_code_rate_ul(mcs, ps->mcs_table);
   sched_pusch->Qm = nr_get_Qm_ul(mcs, ps->mcs_table);
   if (ps->pusch_Config->tp_pi2BPSK
@@ -539,7 +547,7 @@ bool nr_ul_preprocessor_phytest(module_id_t module_id, frame_t frame, sub_frame_
                                         ps->N_PRB_DMRS * ps->num_dmrs_symb,
                                         0, // nb_rb_oh
                                         0,
-                                        1 /* NrOfLayers */)
+                                        ps->nrOfLayers /* NrOfLayers */)
                          >> 3;
 
   /* mark the corresponding RBs as used */
diff --git a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_primitives.c b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_primitives.c
index 1c0cb0353e7..4583da876f4 100644
--- a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_primitives.c
+++ b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_primitives.c
@@ -355,6 +355,7 @@ void nr_set_pusch_semi_static(const NR_ServingCellConfigCommon_t *scc,
                               long dci_format,
                               int tda,
                               uint8_t num_dmrs_cdm_grps_no_data,
+                              uint8_t nrOfLayers,
                               NR_pusch_semi_static_t *ps)
 {
   ps->dci_format = dci_format;
@@ -371,11 +372,11 @@ void nr_set_pusch_semi_static(const NR_ServingCellConfigCommon_t *scc,
 
   ps->pusch_Config = ubwp?ubwp->bwp_Dedicated->pusch_Config->choice.setup:(ubwpd ? ubwpd->pusch_Config->choice.setup : NULL);
   if (ps->pusch_Config == NULL || !ps->pusch_Config->transformPrecoder)
-    ps->transform_precoding = !scc->uplinkConfigCommon->initialUplinkBWP->rach_ConfigCommon->choice.setup->msg3_transformPrecoder;
+    ps->transformPrecoder = !scc->uplinkConfigCommon->initialUplinkBWP->rach_ConfigCommon->choice.setup->msg3_transformPrecoder;
   else
-    ps->transform_precoding = *ps->pusch_Config->transformPrecoder;
+    ps->transformPrecoder = *ps->pusch_Config->transformPrecoder;
   const int target_ss = NR_SearchSpace__searchSpaceType_PR_ue_Specific;
-  if (ps->transform_precoding)
+  if (ps->transformPrecoder)
     ps->mcs_table = get_pusch_mcs_table(ps->pusch_Config ? ps->pusch_Config->mcs_Table : NULL,
                                     0,
                                     ps->dci_format,
@@ -392,6 +393,7 @@ void nr_set_pusch_semi_static(const NR_ServingCellConfigCommon_t *scc,
     num_dmrs_cdm_grps_no_data = 2; // in case of transform precoding - no Data sent in DMRS symbol
   }
 
+  ps->nrOfLayers = nrOfLayers;
   ps->num_dmrs_cdm_grps_no_data = num_dmrs_cdm_grps_no_data;
 
   /* DMRS calculations */
@@ -689,8 +691,21 @@ void config_uldci(const NR_BWP_Uplink_t *ubwp,
                     "Non Codebook configuration non supported\n");
         dci_pdu_rel15->srs_resource_indicator.val = 0; // taking resource 0 for SRS
       }
+      AssertFatal((pusch_pdu->Tpmi==0), "unsupport Tpmi\n");
+      dci_pdu_rel15->precoding_information.val= 0;
+      if (pusch_pdu->nrOfLayers == 2)
+      {
+        dci_pdu_rel15->precoding_information.val = 4;
+      }
+      else if (pusch_pdu->nrOfLayers == 4)
+      {
+        dci_pdu_rel15->precoding_information.val = 11;
+      }
+
+      // antenna_ports.val = 0 for transform precoder is disabled, dmrs-Type=1, maxLength=1, Rank=1/2/3/4 
       // Antenna Ports
-      dci_pdu_rel15->antenna_ports.val = 0; // TODO for now it is hardcoded, it should depends on cdm group no data and rank
+      dci_pdu_rel15->antenna_ports.val = 0; 
+      
       // DMRS sequence initialization
       dci_pdu_rel15->dmrs_sequence_initialization.val = pusch_pdu->scid;
       break;
@@ -699,11 +714,14 @@ void config_uldci(const NR_BWP_Uplink_t *ubwp,
   }
 
   LOG_D(NR_MAC,
-        "%s() ULDCI type 0 payload: freq_alloc %d, time_alloc %d, freq_hop_flag %d, mcs %d tpc %d ndi %d rv %d\n",
+        "%s() ULDCI type 0 payload: dci_format %d, freq_alloc %d, time_alloc %d, freq_hop_flag %d, precoding_information.val %d antenna_ports.val %d mcs %d tpc %d ndi %d rv %d\n",
         __func__,
+        dci_format,
         dci_pdu_rel15->frequency_domain_assignment.val,
         dci_pdu_rel15->time_domain_assignment.val,
         dci_pdu_rel15->frequency_hopping_flag.val,
+        dci_pdu_rel15->precoding_information.val,
+        dci_pdu_rel15->antenna_ports.val,
         dci_pdu_rel15->mcs,
         dci_pdu_rel15->tpc,
         dci_pdu_rel15->ndi,
diff --git a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_ulsch.c b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_ulsch.c
index 9a6fa32b5f3..269d6c5359c 100644
--- a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_ulsch.c
+++ b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_ulsch.c
@@ -905,7 +905,7 @@ bool allocate_ul_retransmission(module_id_t module_id,
   NR_BWP_t *genericParameters = sched_ctrl->active_ubwp ? &sched_ctrl->active_ubwp->bwp_Common->genericParameters : &scc->uplinkConfigCommon->initialUplinkBWP->genericParameters;
   int rbStart = 0; // wrt BWP start
   const uint16_t bwpSize = NRRIV2BW(genericParameters->locationAndBandwidth, MAX_BWP_SIZE);
-
+  const uint8_t nrOfLayers = 1;
   const uint8_t num_dmrs_cdm_grps_no_data = (sched_ctrl->active_bwp || ubwpd) ? 1 : 2;
   const int tda = sched_ctrl->active_ubwp ? RC.nrmac[module_id]->preferred_ul_tda[sched_ctrl->active_ubwp->bwp_Id][slot] : 0;
   LOG_D(NR_MAC,"retInfo->time_domain_allocation = %d, tda = %d\n", retInfo->time_domain_allocation, tda);
@@ -926,8 +926,9 @@ bool allocate_ul_retransmission(module_id_t module_id,
 
     if (ps->time_domain_allocation != tda
         || ps->dci_format != dci_format
+        || ps->nrOfLayers != nrOfLayers
         || ps->num_dmrs_cdm_grps_no_data != num_dmrs_cdm_grps_no_data)
-      nr_set_pusch_semi_static(scc, sched_ctrl->active_ubwp, ubwpd, dci_format, tda, num_dmrs_cdm_grps_no_data, ps);
+      nr_set_pusch_semi_static(scc, sched_ctrl->active_ubwp, ubwpd, dci_format, tda, num_dmrs_cdm_grps_no_data, nrOfLayers,ps);
     LOG_D(NR_MAC, "%s(): retransmission keeping TDA %d and TBS %d\n", __func__, tda, retInfo->tb_size);
   } else {
     /* the retransmission will use a different time domain allocation, check
@@ -939,7 +940,7 @@ bool allocate_ul_retransmission(module_id_t module_id,
       rbSize++;
     NR_pusch_semi_static_t temp_ps;
     int dci_format = get_dci_format(sched_ctrl);
-    nr_set_pusch_semi_static(scc, sched_ctrl->active_ubwp,ubwpd, dci_format, tda, num_dmrs_cdm_grps_no_data, &temp_ps);
+    nr_set_pusch_semi_static(scc, sched_ctrl->active_ubwp,ubwpd, dci_format, tda, num_dmrs_cdm_grps_no_data, nrOfLayers, &temp_ps);
     uint32_t new_tbs;
     uint16_t new_rbSize;
     bool success = nr_find_nb_rb(retInfo->Qm,
@@ -1094,7 +1095,7 @@ void pf_ul(module_id_t module_id,
       if (max_num_ue < 0)
         return;
 
-      LOG_D(NR_MAC,"Looking for min_rb %d RBs, starting at %d\n", min_rb, rbStart);
+      LOG_D(NR_MAC,"Looking for min_rb %d RBs, starting at %d num_dmrs_cdm_grps_no_data %d\n", min_rb, rbStart, ps->num_dmrs_cdm_grps_no_data);
       while (rbStart < bwpSize && !rballoc_mask[rbStart]) rbStart++;
       if (rbStart + min_rb >= bwpSize) {
         LOG_W(NR_MAC, "cannot allocate continuous UL data for UE %d/RNTI %04x: no resources (rbStart %d, min_rb %d, bwpSize %d\n",
@@ -1106,13 +1107,15 @@ void pf_ul(module_id_t module_id,
       /* we want to avoid a lengthy deduction of DMRS and other parameters in
        * every TTI if we can save it, so check whether dci_format, TDA, or
        * num_dmrs_cdm_grps_no_data has changed and only then recompute */
+      const uint8_t nrOfLayers = 1;
       const uint8_t num_dmrs_cdm_grps_no_data = (sched_ctrl->active_ubwp || ubwpd) ? 1 : 2;
       int dci_format = get_dci_format(sched_ctrl);
       const int tda = sched_ctrl->active_ubwp ? nrmac->preferred_ul_tda[sched_ctrl->active_ubwp->bwp_Id][slot] : 0;
       if (ps->time_domain_allocation != tda
           || ps->dci_format != dci_format
+          || ps->nrOfLayers != nrOfLayers
           || ps->num_dmrs_cdm_grps_no_data != num_dmrs_cdm_grps_no_data)
-        nr_set_pusch_semi_static(scc, sched_ctrl->active_ubwp, ubwpd, dci_format, tda, num_dmrs_cdm_grps_no_data, ps);
+        nr_set_pusch_semi_static(scc, sched_ctrl->active_ubwp, ubwpd, dci_format, tda, num_dmrs_cdm_grps_no_data,nrOfLayers, ps);
       NR_sched_pusch_t *sched_pusch = &sched_ctrl->sched_pusch;
       sched_pusch->mcs = 9;
       update_ul_ue_R_Qm(sched_pusch, ps);
@@ -1125,7 +1128,7 @@ void pf_ul(module_id_t module_id,
                                             ps->N_PRB_DMRS * ps->num_dmrs_symb,
                                             0, // nb_rb_oh
                                             0,
-                                            1 /* NrOfLayers */)
+                                            ps->nrOfLayers)
                              >> 3;
 
       /* Mark the corresponding RBs as used */
@@ -1205,13 +1208,15 @@ void pf_ul(module_id_t module_id,
     /* we want to avoid a lengthy deduction of DMRS and other parameters in
      * every TTI if we can save it, so check whether dci_format, TDA, or
      * num_dmrs_cdm_grps_no_data has changed and only then recompute */
+    const uint8_t nrOfLayers = 1;
     const uint8_t num_dmrs_cdm_grps_no_data = (sched_ctrl->active_ubwp || ubwpd) ? 1 : 2;
     int dci_format = get_dci_format(sched_ctrl);
     const int tda = sched_ctrl->active_ubwp ? nrmac->preferred_ul_tda[sched_ctrl->active_ubwp->bwp_Id][slot] : 0;
     if (ps->time_domain_allocation != tda
         || ps->dci_format != dci_format
+        || ps->nrOfLayers != nrOfLayers
         || ps->num_dmrs_cdm_grps_no_data != num_dmrs_cdm_grps_no_data)
-      nr_set_pusch_semi_static(scc, sched_ctrl->active_ubwp, ubwpd, dci_format, tda, num_dmrs_cdm_grps_no_data, ps);
+      nr_set_pusch_semi_static(scc, sched_ctrl->active_ubwp, ubwpd, dci_format, tda, num_dmrs_cdm_grps_no_data, nrOfLayers,ps);
     update_ul_ue_R_Qm(sched_pusch, ps);
 
     /* Calculate the current scheduling bytes and the necessary RBs */
@@ -1473,7 +1478,7 @@ void nr_schedule_ulsch(module_id_t module_id, frame_t frame, sub_frame_t slot)
     sched_ctrl->last_ul_slot = sched_pusch->slot;
 
     LOG_D(NR_MAC,
-          "ULSCH/PUSCH: %4d.%2d RNTI %04x UL sched %4d.%2d DCI L %d start %2d RBS %3d startSymbol %2d nb_symbol %2d dmrs_pos %x MCS %2d TBS %4d HARQ PID %2d round %d RV %d NDI %d est %6d sched %6d est BSR %6d TPC %d\n",
+          "ULSCH/PUSCH: %4d.%2d RNTI %04x UL sched %4d.%2d DCI L %d start %2d RBS %3d startSymbol %2d nb_symbol %2d dmrs_pos %x MCS %2d nrOfLayers %2d num_dmrs_cdm_grps_no_data %2d TBS %4d HARQ PID %2d round %d RV %d NDI %d est %6d sched %6d est BSR %6d TPC %d\n",
           frame,
           slot,
           rnti,
@@ -1486,6 +1491,8 @@ void nr_schedule_ulsch(module_id_t module_id, frame_t frame, sub_frame_t slot)
           ps->nrOfSymbols,
           ps->ul_dmrs_symb_pos,
           sched_pusch->mcs,
+          ps->nrOfLayers,
+          ps->num_dmrs_cdm_grps_no_data,
           sched_pusch->tb_size,
           harq_id,
           cur_harq->round,
@@ -1532,17 +1539,18 @@ void nr_schedule_ulsch(module_id_t module_id, frame_t frame, sub_frame_t slot)
     pusch_pdu->qam_mod_order = sched_pusch->Qm;
     pusch_pdu->mcs_index = sched_pusch->mcs;
     pusch_pdu->mcs_table = ps->mcs_table;
-    pusch_pdu->transform_precoding = ps->transform_precoding;
+    pusch_pdu->transformPrecoder = ps->transformPrecoder;
     if (ps->pusch_Config && ps->pusch_Config->dataScramblingIdentityPUSCH)
       pusch_pdu->data_scrambling_id = *ps->pusch_Config->dataScramblingIdentityPUSCH;
     else
       pusch_pdu->data_scrambling_id = *scc->physCellId;
-    pusch_pdu->nrOfLayers = 1;
+    pusch_pdu->nrOfLayers = ps->nrOfLayers;
+    pusch_pdu->num_dmrs_cdm_grps_no_data = ps->num_dmrs_cdm_grps_no_data;
 
     /* FAPI: DMRS */
     pusch_pdu->ul_dmrs_symb_pos = ps->ul_dmrs_symb_pos;
     pusch_pdu->dmrs_config_type = ps->dmrs_config_type;
-    if (pusch_pdu->transform_precoding) { // transform precoding disabled
+    if (pusch_pdu->transformPrecoder) { // transform precoding disabled
       long *scramblingid=NULL;
       if (ps->NR_DMRS_UplinkConfig && pusch_pdu->scid == 0)
         scramblingid = ps->NR_DMRS_UplinkConfig->transformPrecodingDisabled->scramblingID0;
@@ -1562,7 +1570,7 @@ void nr_schedule_ulsch(module_id_t module_id, frame_t frame, sub_frame_t slot)
     }
     pusch_pdu->scid = 0;      // DMRS sequence initialization [TS38.211, sec 6.4.1.1.1]
     pusch_pdu->num_dmrs_cdm_grps_no_data = ps->num_dmrs_cdm_grps_no_data;
-    pusch_pdu->dmrs_ports = 1;
+    pusch_pdu->dmrs_ports = ((1<<ps->nrOfLayers) - 1);
 
     /* FAPI: Pusch Allocation in frequency domain */
     pusch_pdu->resource_alloc = 1; //type 1
@@ -1588,7 +1596,7 @@ void nr_schedule_ulsch(module_id_t module_id, frame_t frame, sub_frame_t slot)
     LOG_D(NR_MAC,"PUSCH PDU : data_scrambling_identity %x, dmrs_scrambling_id %x\n",pusch_pdu->data_scrambling_id,pusch_pdu->ul_dmrs_scrambling_id);
     /* TRANSFORM PRECODING --------------------------------------------------------*/
 
-    if (pusch_pdu->transform_precoding == NR_PUSCH_Config__transformPrecoder_enabled){
+    if (pusch_pdu->transformPrecoder == NR_PUSCH_Config__transformPrecoder_enabled){
 
       // U as specified in section 6.4.1.1.1.2 in 38.211, if sequence hopping and group hopping are disabled
       pusch_pdu->dfts_ofdm.low_papr_group_number = pusch_pdu->pusch_identity % 30;
diff --git a/openair2/LAYER2/NR_MAC_gNB/mac_proto.h b/openair2/LAYER2/NR_MAC_gNB/mac_proto.h
index 91221276b06..8a560580c93 100644
--- a/openair2/LAYER2/NR_MAC_gNB/mac_proto.h
+++ b/openair2/LAYER2/NR_MAC_gNB/mac_proto.h
@@ -298,6 +298,7 @@ void nr_set_pusch_semi_static(const NR_ServingCellConfigCommon_t *scc,
                               long dci_format,
                               int tda,
                               uint8_t num_dmrs_cdm_grps_no_data,
+                              uint8_t nrOfLayers,
                               NR_pusch_semi_static_t *ps);
 
 uint8_t nr_get_tpc(int target, uint8_t cqi, int incr);
diff --git a/openair2/LAYER2/NR_MAC_gNB/nr_mac_gNB.h b/openair2/LAYER2/NR_MAC_gNB/nr_mac_gNB.h
index 27f42f4b1d3..66b10dad420 100644
--- a/openair2/LAYER2/NR_MAC_gNB/nr_mac_gNB.h
+++ b/openair2/LAYER2/NR_MAC_gNB/nr_mac_gNB.h
@@ -318,13 +318,14 @@ typedef struct NR_sched_pucch {
 typedef struct NR_pusch_semi_static_t {
   int dci_format;
   int time_domain_allocation;
+  uint8_t nrOfLayers;
   uint8_t num_dmrs_cdm_grps_no_data;
 
   int startSymbolIndex;
   int nrOfSymbols;
 
   NR_PUSCH_Config_t *pusch_Config;
-  uint8_t transform_precoding;
+  uint8_t transformPrecoder;
   uint8_t mcs_table;
 
   long mapping_type;
diff --git a/openair2/RRC/NR/MESSAGES/asn1_msg.c b/openair2/RRC/NR/MESSAGES/asn1_msg.c
index ff6923429ef..52d03b5e569 100755
--- a/openair2/RRC/NR/MESSAGES/asn1_msg.c
+++ b/openair2/RRC/NR/MESSAGES/asn1_msg.c
@@ -1867,6 +1867,7 @@ int16_t do_RRCReconfiguration(
     //                                  1,
     //                                  1,
     //                                  carrier->pdsch_AntennaPorts,
+    //                                  carrier->pusch_AntennaPorts,
     //                                  carrier->initial_csi_index[ue_context_p->local_uid + 1],
     //                                  ue_context_pP->local_uid);
 
diff --git a/openair2/RRC/NR/nr_rrc_proto.h b/openair2/RRC/NR/nr_rrc_proto.h
index 9465b9b4b06..6a944504507 100644
--- a/openair2/RRC/NR/nr_rrc_proto.h
+++ b/openair2/RRC/NR/nr_rrc_proto.h
@@ -79,6 +79,7 @@ void fill_default_secondaryCellGroup(NR_ServingCellConfigCommon_t *servingcellco
                                      int scg_id,
                                      int servCellIndex,
                                      int dl_antenna_ports,
+                                     int ul_antenna_ports,
                                      int do_csirs,
                                      int initial_csi_index,
                                      int uid);
@@ -90,6 +91,7 @@ void fill_default_reconfig(NR_ServingCellConfigCommon_t *servingcellconfigcommon
                            NR_RRCReconfiguration_IEs_t *reconfig,
                            NR_CellGroupConfig_t *secondaryCellGroup,
                            int dl_antenna_ports,
+                           int ul_antenna_ports,
                            int do_csirs,
                            int initial_csi_index,
                            int uid);
diff --git a/openair2/RRC/NR/rrc_gNB_nsa.c b/openair2/RRC/NR/rrc_gNB_nsa.c
index f9fce48f412..22e43cbb6a8 100644
--- a/openair2/RRC/NR/rrc_gNB_nsa.c
+++ b/openair2/RRC/NR/rrc_gNB_nsa.c
@@ -241,6 +241,7 @@ void rrc_add_nsa_user(gNB_RRC_INST *rrc,struct rrc_gNB_ue_context_s *ue_context_
                         reconfig_ies,
                         ue_context_p->ue_context.secondaryCellGroup,
                         carrier->pdsch_AntennaPorts,
+                        carrier->pusch_AntennaPorts,
                         carrier->do_CSIRS,
                         carrier->initial_csi_index[ue_context_p->local_uid + 1],
                         ue_context_p->local_uid);
@@ -250,6 +251,7 @@ void rrc_add_nsa_user(gNB_RRC_INST *rrc,struct rrc_gNB_ue_context_s *ue_context_
                         reconfig_ies,
                         ue_context_p->ue_context.secondaryCellGroup,
                         carrier->pdsch_AntennaPorts,
+                        carrier->pusch_AntennaPorts,
                         carrier->do_CSIRS,
                         carrier->initial_csi_index[ue_context_p->local_uid + 1],
                         ue_context_p->local_uid);
@@ -348,7 +350,7 @@ void rrc_add_nsa_user(gNB_RRC_INST *rrc,struct rrc_gNB_ue_context_s *ue_context_
     rrc_mac_config_req_gNB(rrc->module_id,
                            rrc->carrier.ssb_SubcarrierOffset,
                            rrc->carrier.pdsch_AntennaPorts,
-			   rrc->carrier.pusch_AntennaPorts,
+                           rrc->carrier.pusch_AntennaPorts,
                            rrc->carrier.sib1_tda,
                            rrc->carrier.servingcellconfigcommon,
                            &rrc->carrier.mib,
diff --git a/openair2/RRC/NR/rrc_gNB_reconfig.c b/openair2/RRC/NR/rrc_gNB_reconfig.c
index 980bd39c379..d03f3615022 100644
--- a/openair2/RRC/NR/rrc_gNB_reconfig.c
+++ b/openair2/RRC/NR/rrc_gNB_reconfig.c
@@ -55,6 +55,7 @@ void fill_default_secondaryCellGroup(NR_ServingCellConfigCommon_t *servingcellco
                                      int scg_id,
                                      int servCellIndex,
                                      int dl_antenna_ports,
+                                     int ul_antenna_ports,
                                      int do_csirs,
                                      int initial_csi_index,
                                      int uid) {
@@ -745,14 +746,14 @@ void fill_default_secondaryCellGroup(NR_ServingCellConfigCommon_t *servingcellco
  pusch_Config->codebookSubset=calloc(1,sizeof(*pusch_Config->codebookSubset));
  *pusch_Config->codebookSubset = NR_PUSCH_Config__codebookSubset_nonCoherent;
  pusch_Config->maxRank=calloc(1,sizeof(*pusch_Config->maxRank));
- *pusch_Config->maxRank= 1;
+ *pusch_Config->maxRank= ul_antenna_ports;
  pusch_Config->rbg_Size=NULL;
  pusch_Config->uci_OnPUSCH=NULL;
  pusch_Config->tp_pi2BPSK=NULL;
 
  /*------------------------------TRANSFORM PRECODING- -----------------------------------------------------------------------*/
 
- uint8_t transform_precoding = NR_PUSCH_Config__transformPrecoder_disabled;
+ uint8_t transformPrecoder = NR_PUSCH_Config__transformPrecoder_disabled;
 
  // TBD: configure this from .conf file, Dedicated params cannot yet be configured in .conf file.
  // Enable this to test transform precoding enabled from dedicated config.
@@ -764,13 +765,13 @@ void fill_default_secondaryCellGroup(NR_ServingCellConfigCommon_t *servingcellco
 
  if (pusch_Config->transformPrecoder == NULL) {
   if (servingcellconfigcommon->uplinkConfigCommon->initialUplinkBWP->rach_ConfigCommon->choice.setup->msg3_transformPrecoder != NULL)
-    transform_precoding = NR_PUSCH_Config__transformPrecoder_enabled;
+    transformPrecoder = NR_PUSCH_Config__transformPrecoder_enabled;
  }
  else
-    transform_precoding = *pusch_Config->transformPrecoder;
+    transformPrecoder = *pusch_Config->transformPrecoder;
 
 
- if (transform_precoding == NR_PUSCH_Config__transformPrecoder_enabled ) {
+ if (transformPrecoder == NR_PUSCH_Config__transformPrecoder_enabled ) {
     /* Enable DMRS uplink config for transform precoding enabled */
     NR_DMRS_UplinkConfig->transformPrecodingEnabled = calloc(1,sizeof(*NR_DMRS_UplinkConfig->transformPrecodingEnabled));
     NR_DMRS_UplinkConfig->transformPrecodingEnabled->nPUSCH_Identity = NULL;
@@ -1348,6 +1349,7 @@ void fill_default_reconfig(NR_ServingCellConfigCommon_t *servingcellconfigcommon
                            NR_RRCReconfiguration_IEs_t *reconfig,
                            NR_CellGroupConfig_t *secondaryCellGroup,
                            int dl_antenna_ports,
+                           int ul_antenna_ports,
                            int do_csirs,
                            int initial_csi_index,
                            int uid) {
@@ -1363,6 +1365,7 @@ void fill_default_reconfig(NR_ServingCellConfigCommon_t *servingcellconfigcommon
                                   1,
                                   1,
                                   dl_antenna_ports,
+                                  ul_antenna_ports,
                                   do_csirs,
                                   initial_csi_index,
                                   uid);
diff --git a/targets/ARCH/rfsimulator/simulator.c b/targets/ARCH/rfsimulator/simulator.c
index 13945998d75..20faa865916 100644
--- a/targets/ARCH/rfsimulator/simulator.c
+++ b/targets/ARCH/rfsimulator/simulator.c
@@ -429,7 +429,7 @@ static int rfsimulator_write_internal(rfsimulator_state_t *t, openair0_timestamp
   if (!alreadyLocked)
     pthread_mutex_lock(&Sockmutex);
 
-  LOG_D(HW,"sending %d samples at time: %ld\n", nsamps, timestamp);
+  LOG_D(HW,"sending %d samples at time: %ld, nbAnt %d\n", nsamps, timestamp, nbAnt);
 
   for (int i=0; i<FD_SETSIZE; i++) {
     buffer_t *b=&t->buf[i];
@@ -623,7 +623,7 @@ static int rfsimulator_read(openair0_device *device, openair0_timestamp *ptimest
   }
 
   rfsimulator_state_t *t = device->priv;
-  LOG_D(HW, "Enter rfsimulator_read, expect %d samples, will release at TS: %ld\n", nsamps, t->nextTimestamp+nsamps);
+  LOG_D(HW, "Enter rfsimulator_read, expect %d samples, will release at TS: %ld, nbAnt %d\n", nsamps, t->nextTimestamp+nsamps, nbAnt);
   // deliver data from received data
   // check if a UE is connected
   int first_sock;
@@ -728,6 +728,7 @@ static int rfsimulator_read(openair0_device *device, openair0_timestamp *ptimest
 
       for (int a=0; a<nbAnt; a++) {//loop over number of Rx antennas
         if ( ptr->channel_model != NULL ) // apply a channel model
+        {
           rxAddInput( ptr->circularBuf, (struct complex16 *) samplesVoid[a],
                       a,
                       ptr->channel_model,
@@ -735,11 +736,21 @@ static int rfsimulator_read(openair0_device *device, openair0_timestamp *ptimest
                       t->nextTimestamp,
                       CirSize
                     );
-        else { // no channel modeling
+        }
+        else 
+        { // no channel modeling
+          
+          #if 0
           double H_awgn_mimo[4][4] ={{1.0, 0.5, 0.25, 0.125},//rx 0
                                      {0.5, 1.0, 0.5, 0.25},  //rx 1
                                      {0.25, 0.5, 1.0, 0.5},  //rx 2
                                      {0.125, 0.25, 0.5, 1.0}};//rx 3
+          #else
+          double H_awgn_mimo[4][4] ={{1.0, 0.0, 0.0, 0.0},//rx 0
+                                     {0.0, 1.0, 0.0, 0.0},//rx 1
+                                     {0.0, 0.0, 1.0, 0.0},//rx 2
+                                     {0.0, 0.0, 0.0, 1.0}};//rx 3
+          #endif
 
           sample_t *out=(sample_t *)samplesVoid[a];
           int nbAnt_tx = ptr->th.nbAnt;//number of Tx antennas
diff --git a/targets/PROJECTS/GENERIC-NR-5GC/CONF/gnb.sa.band78.fr1.106PRB.usrpb210.4layer.conf b/targets/PROJECTS/GENERIC-NR-5GC/CONF/gnb.sa.band78.fr1.106PRB.usrpb210.4layer.conf
new file mode 100644
index 00000000000..74f2f9c11ce
--- /dev/null
+++ b/targets/PROJECTS/GENERIC-NR-5GC/CONF/gnb.sa.band78.fr1.106PRB.usrpb210.4layer.conf
@@ -0,0 +1,320 @@
+Active_gNBs = ( "gNB-Eurecom-5GNRBox");
+# Asn1_verbosity, choice in: none, info, annoying
+Asn1_verbosity = "none";
+
+gNBs =
+(
+ {
+    ////////// Identification parameters:
+    gNB_ID    =  0xe00;
+    gNB_name  =  "gNB-Eurecom-5GNRBox";
+
+    // Tracking area code, 0x0000 and 0xfffe are reserved values
+    tracking_area_code  =  1;
+    plmn_list = ({
+                  mcc = 208;
+                  mnc = 99;
+                  mnc_length = 2;
+                  snssaiList = (
+                    {
+                      sst = 1;
+                      sd  = 0x1; // 0 false, else true
+                    },
+                    {
+                      sst = 1;
+                      sd  = 0x112233; // 0 false, else true
+                    }
+                  );
+
+                  });
+
+    nr_cellid = 12345678L;
+
+    ////////// Physical parameters:
+
+    ssb_SubcarrierOffset                                      = 0;
+    pdsch_AntennaPorts                                        = 4;
+    pusch_AntennaPorts                                        = 4;
+    sib1_tda                                                  = 0;
+
+     pdcch_ConfigSIB1 = (
+      {
+        controlResourceSetZero = 12;
+        searchSpaceZero = 0;
+      }
+      );
+
+    servingCellConfigCommon = (
+    {
+ #spCellConfigCommon
+
+      physCellId                                                    = 0;
+
+#  downlinkConfigCommon
+    #frequencyInfoDL
+      # this is 3600 MHz + 43 PRBs@30kHz SCS (same as initial BWP)
+      absoluteFrequencySSB                                             = 641280;
+      dl_frequencyBand                                                 = 78;
+      # this is 3600 MHz
+      dl_absoluteFrequencyPointA                                       = 640008;
+      #scs-SpecificCarrierList
+        dl_offstToCarrier                                              = 0;
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120
+        dl_subcarrierSpacing                                           = 1;
+        dl_carrierBandwidth                                            = 106;
+     #initialDownlinkBWP
+      #genericParameters
+        # this is RBstart=27,L=48 (275*(L-1))+RBstart
+        initialDLBWPlocationAndBandwidth                               = 28875; # 6366 12925 12956 28875 12952
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120
+        initialDLBWPsubcarrierSpacing                                   = 1;
+      #pdcch-ConfigCommon
+        initialDLBWPcontrolResourceSetZero                              = 12;
+        initialDLBWPsearchSpaceZero                                     = 0;
+      #pdsch-ConfigCommon
+        #pdschTimeDomainAllocationList (up to 16 entries)
+        initialDLBWPk0_0                    = 0;  #for DL slot
+        initialDLBWPmappingType_0           = 0;  #0=typeA,1=typeB
+        initialDLBWPstartSymbolAndLength_0  = 40; #this is SS=1,L=13
+
+        initialDLBWPk0_1                    = 0;  #for mixed slot
+        initialDLBWPmappingType_1           = 0;
+        initialDLBWPstartSymbolAndLength_1  = 57; #this is SS=1,L=5
+
+  #uplinkConfigCommon
+     #frequencyInfoUL
+      ul_frequencyBand                                              = 78;
+      #scs-SpecificCarrierList
+      ul_offstToCarrier                                             = 0;
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120
+      ul_subcarrierSpacing                                          = 1;
+      ul_carrierBandwidth                                           = 106;
+      pMax                                                          = 20;
+     #initialUplinkBWP
+      #genericParameters
+        initialULBWPlocationAndBandwidth                            = 28875;
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120
+        initialULBWPsubcarrierSpacing                               = 1;
+      #rach-ConfigCommon
+        #rach-ConfigGeneric
+          prach_ConfigurationIndex                                  = 98;
+#prach_msg1_FDM
+#0 = one, 1=two, 2=four, 3=eight
+          prach_msg1_FDM                                            = 0;
+          prach_msg1_FrequencyStart                                 = 0;
+          zeroCorrelationZoneConfig                                 = 13;
+          preambleReceivedTargetPower                               = -96;
+#preamblTransMax (0...10) = (3,4,5,6,7,8,10,20,50,100,200)
+          preambleTransMax                                          = 6;
+#powerRampingStep
+# 0=dB0,1=dB2,2=dB4,3=dB6
+        powerRampingStep                                            = 1;
+#ra_ReponseWindow
+#1,2,4,8,10,20,40,80
+        ra_ResponseWindow                                           = 4;
+#ssb_perRACH_OccasionAndCB_PreamblesPerSSB_PR
+#1=oneeighth,2=onefourth,3=half,4=one,5=two,6=four,7=eight,8=sixteen
+        ssb_perRACH_OccasionAndCB_PreamblesPerSSB_PR                = 4;
+#oneHalf (0..15) 4,8,12,16,...60,64
+        ssb_perRACH_OccasionAndCB_PreamblesPerSSB                   = 14;
+#ra_ContentionResolutionTimer
+#(0..7) 8,16,24,32,40,48,56,64
+        ra_ContentionResolutionTimer                                = 7;
+        rsrp_ThresholdSSB                                           = 19;
+#prach-RootSequenceIndex_PR
+#1 = 839, 2 = 139
+        prach_RootSequenceIndex_PR                                  = 2;
+        prach_RootSequenceIndex                                     = 1;
+        # SCS for msg1, can only be 15 for 30 kHz < 6 GHz, takes precendence over the one derived from prach-ConfigIndex
+        #
+        msg1_SubcarrierSpacing                                      = 1,
+# restrictedSetConfig
+# 0=unrestricted, 1=restricted type A, 2=restricted type B
+        restrictedSetConfig                                         = 0,
+
+      # pusch-ConfigCommon (up to 16 elements)
+        initialULBWPk2_0                      = 6;  # used for UL slot
+        initialULBWPmappingType_0             = 1
+        initialULBWPstartSymbolAndLength_0    = 41; # this is SS=0 L=13
+
+        initialULBWPk2_1                      = 6;  # used for mixed slot
+        initialULBWPmappingType_1             = 1;
+        initialULBWPstartSymbolAndLength_1    = 52; # this is SS=10 L=4
+
+        initialULBWPk2_2                      = 7;  # used for Msg.3 during RA
+        initialULBWPmappingType_2             = 1;
+        initialULBWPstartSymbolAndLength_2    = 52; # this is SS=10 L=4
+
+        msg3_DeltaPreamble                                          = 1;
+        p0_NominalWithGrant                                         =-90;
+
+# pucch-ConfigCommon setup :
+# pucchGroupHopping
+# 0 = neither, 1= group hopping, 2=sequence hopping
+        pucchGroupHopping                                           = 0;
+        hoppingId                                                   = 40;
+        p0_nominal                                                  = -90;
+# ssb_PositionsInBurs_BitmapPR
+# 1=short, 2=medium, 3=long
+      ssb_PositionsInBurst_PR                                       = 2;
+      ssb_PositionsInBurst_Bitmap                                   = 1;
+
+# ssb_periodicityServingCell
+# 0 = ms5, 1=ms10, 2=ms20, 3=ms40, 4=ms80, 5=ms160, 6=spare2, 7=spare1
+      ssb_periodicityServingCell                                    = 2;
+
+# dmrs_TypeA_position
+# 0 = pos2, 1 = pos3
+      dmrs_TypeA_Position                                           = 0;
+
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120
+      subcarrierSpacing                                             = 1;
+
+
+  #tdd-UL-DL-ConfigurationCommon
+# subcarrierSpacing
+# 0=kHz15, 1=kHz30, 2=kHz60, 3=kHz120
+      referenceSubcarrierSpacing                                    = 1;
+      # pattern1
+      # dl_UL_TransmissionPeriodicity
+      # 0=ms0p5, 1=ms0p625, 2=ms1, 3=ms1p25, 4=ms2, 5=ms2p5, 6=ms5, 7=ms10
+      dl_UL_TransmissionPeriodicity                                 = 6;
+      nrofDownlinkSlots                                             = 7;
+      nrofDownlinkSymbols                                           = 6;
+      nrofUplinkSlots                                               = 2;
+      nrofUplinkSymbols                                             = 4;
+
+      ssPBCH_BlockPower                                             = -25;
+  }
+
+  );
+
+
+    # ------- SCTP definitions
+    SCTP :
+    {
+        # Number of streams to use in input/output
+        SCTP_INSTREAMS  = 2;
+        SCTP_OUTSTREAMS = 2;
+    };
+
+
+    ////////// MME parameters:
+    amf_ip_address      = ( { ipv4       = "192.168.70.132";
+                              ipv6       = "192:168:30::17";
+                              active     = "yes";
+                              preference = "ipv4";
+                            }
+                          );
+
+
+    NETWORK_INTERFACES :
+    {
+        GNB_INTERFACE_NAME_FOR_NG_AMF            = "demo-oai";
+        GNB_IPV4_ADDRESS_FOR_NG_AMF              = "192.168.70.129/24";
+        GNB_INTERFACE_NAME_FOR_NGU               = "demo-oai";
+        GNB_IPV4_ADDRESS_FOR_NGU                 = "192.168.70.129/24";
+        GNB_PORT_FOR_S1U                         = 2152; # Spec 2152
+    };
+
+  }
+);
+
+MACRLCs = (
+    {
+        num_cc                      = 1;
+        tr_s_preference             = "local_L1";
+        tr_n_preference             = "local_RRC";
+        pusch_TargetSNRx10          = 150;
+        pucch_TargetSNRx10          = 200;
+    }
+);
+
+L1s = (
+    {
+	num_cc = 1;
+	tr_n_preference = "local_mac";
+	pusch_proc_threads = 8;
+	prach_dtx_threshold = 120;
+    pucch0_dtx_threshold = 150;
+    ofdm_offset_divisor = 8; #set this to UINT_MAX for offset 0
+    }
+);
+
+RUs = (
+    {
+       local_rf       = "yes"
+         nb_tx          = 4
+         nb_rx          = 4
+         att_tx         = 0
+         att_rx         = 0;
+         bands          = [78];
+         max_pdschReferenceSignalPower = -27;
+         max_rxgain                    = 114;
+         eNB_instances  = [0];
+         #beamforming 1x4 matrix:
+         bf_weights = [0x00007fff, 0x0000, 0x0000, 0x0000, 0x00000000, 0x00007fff, 0x0000, 0x0000, 0x0000, 0x0000, 0x00007fff, 0x0000, 0x0000, 0x0000, 0x0000, 0x00007fff];
+         clock_src = "internal";
+    }
+);
+
+THREAD_STRUCT = (
+  {
+    #three config for level of parallelism "PARALLEL_SINGLE_THREAD", "PARALLEL_RU_L1_SPLIT", or "PARALLEL_RU_L1_TRX_SPLIT"
+    parallel_config    = "PARALLEL_SINGLE_THREAD";
+    #two option for worker "WORKER_DISABLE" or "WORKER_ENABLE"
+    worker_config      = "WORKER_ENABLE";
+  }
+);
+
+rfsimulator :
+{
+    serveraddr = "server";
+    serverport = "4043";
+    options = (); #("saviq"); or/and "chanmod"
+    modelname = "AWGN";
+    IQfile = "/tmp/rfsimulator.iqs";
+};
+
+security = {
+  # preferred ciphering algorithms
+  # the first one of the list that an UE supports in chosen
+  # valid values: nea0, nea1, nea2, nea3
+  ciphering_algorithms = ( "nea0" );
+
+  # preferred integrity algorithms
+  # the first one of the list that an UE supports in chosen
+  # valid values: nia0, nia1, nia2, nia3
+  integrity_algorithms = ( "nia2", "nia0" );
+
+  # setting 'drb_ciphering' to "no" disables ciphering for DRBs, no matter
+  # what 'ciphering_algorithms' configures; same thing for 'drb_integrity'
+  drb_ciphering = "yes";
+  drb_integrity = "no";
+};
+
+     log_config :
+     {
+       global_log_level                      ="info";
+       global_log_verbosity                  ="medium";
+       hw_log_level                          ="info";
+       hw_log_verbosity                      ="medium";
+       phy_log_level                         ="info";
+       phy_log_verbosity                     ="medium";
+       mac_log_level                         ="info";
+       mac_log_verbosity                     ="high";
+       rlc_log_level                         ="info";
+       rlc_log_verbosity                     ="medium";
+       pdcp_log_level                        ="info";
+       pdcp_log_verbosity                    ="medium";
+       rrc_log_level                         ="info";
+       rrc_log_verbosity                     ="medium";
+       ngap_log_level                         ="info";
+       ngap_log_verbosity                     ="medium";
+    };
+
-- 
GitLab