diff --git a/openair1/PHY/INIT/lte_init.c b/openair1/PHY/INIT/lte_init.c
index 9fb665b2702aba45396695522ec90b58af218b78..bdf35cccf99f4ad5d52c7929dce363290777d3ad 100644
--- a/openair1/PHY/INIT/lte_init.c
+++ b/openair1/PHY/INIT/lte_init.c
@@ -882,6 +882,7 @@ void phy_init_lte_ue__PDSCH( LTE_UE_PDSCH* const pdsch, const LTE_DL_FRAME_PARMS
   pdsch->pmi_ext = (uint8_t*)malloc16_clear( frame_parms->N_RB_DL );
   pdsch->llr[0] = (int16_t*)malloc16_clear( (8*((3*8*6144)+12))*sizeof(int16_t) );
   pdsch->llr128 = (int16_t**)malloc16_clear( sizeof(int16_t*) );
+  pdsch->llr128_2ndstream = (int16_t**)malloc16_clear( sizeof(int16_t*) );
   // FIXME! no further allocation for (int16_t*)pdsch->llr128 !!! expect SIGSEGV
 
   pdsch->rxdataF_ext         = (int32_t**)malloc16_clear( 8*sizeof(int32_t*) );
@@ -889,6 +890,7 @@ void phy_init_lte_ue__PDSCH( LTE_UE_PDSCH* const pdsch, const LTE_DL_FRAME_PARMS
   pdsch->rho                 = (int32_t**)malloc16_clear( frame_parms->nb_antennas_rx*sizeof(int32_t*) );
   pdsch->dl_ch_estimates_ext = (int32_t**)malloc16_clear( 8*sizeof(int32_t*) );
   pdsch->dl_ch_rho_ext       = (int32_t**)malloc16_clear( 8*sizeof(int32_t*) );
+  pdsch->dl_ch_rho2_ext       = (int32_t**)malloc16_clear( 8*sizeof(int32_t*) );
   pdsch->dl_ch_mag0          = (int32_t**)malloc16_clear( 8*sizeof(int32_t*) );
   pdsch->dl_ch_magb0         = (int32_t**)malloc16_clear( 8*sizeof(int32_t*) );
 
@@ -904,6 +906,7 @@ void phy_init_lte_ue__PDSCH( LTE_UE_PDSCH* const pdsch, const LTE_DL_FRAME_PARMS
       pdsch->rxdataF_comp0[idx]       = (int32_t*)malloc16_clear( sizeof(int32_t) * num );
       pdsch->dl_ch_estimates_ext[idx] = (int32_t*)malloc16_clear( sizeof(int32_t) * num );
       pdsch->dl_ch_rho_ext[idx]       = (int32_t*)malloc16_clear( sizeof(int32_t) * num );
+      pdsch->dl_ch_rho2_ext[idx]       = (int32_t*)malloc16_clear( sizeof(int32_t) * num );
       pdsch->dl_ch_mag0[idx]          = (int32_t*)malloc16_clear( sizeof(int32_t) * num );
       pdsch->dl_ch_magb0[idx]         = (int32_t*)malloc16_clear( sizeof(int32_t) * num );
     }
diff --git a/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c b/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c
index 2dd9318cc9a6044f7bdd6df76e765fd2fadd7a3d..74db26751be32ed7690edded8444c461fbd49648 100644
--- a/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c
+++ b/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c
@@ -384,22 +384,29 @@ int rx_pdsch(PHY_VARS_UE *phy_vars_ue,
 				     dlsch0_harq->round,
 				     nb_rb, 
 				     lte_ue_pdsch_vars[eNB_id]->log2_maxh); 
-      // compute correlation between signal and interference channels
+      // compute correlation between signal and interference channels (rho12 and rho21)
       dlsch_dual_stream_correlation(frame_parms,
 				    symbol,
 				    nb_rb,
 				    lte_ue_pdsch_vars[eNB_id]->dl_ch_estimates_ext,
-				    NULL,
+				    &(lte_ue_pdsch_vars[eNB_id]->dl_ch_estimates_ext[2]),
 				    lte_ue_pdsch_vars[eNB_id]->dl_ch_rho_ext,
 				    lte_ue_pdsch_vars[eNB_id]->log2_maxh);
+      dlsch_dual_stream_correlation(frame_parms,
+				    symbol,
+				    nb_rb,
+				    &(lte_ue_pdsch_vars[eNB_id]->dl_ch_estimates_ext[2]),
+				    lte_ue_pdsch_vars[eNB_id]->dl_ch_estimates_ext,
+				    lte_ue_pdsch_vars[eNB_id]->dl_ch_rho2_ext,
+				    lte_ue_pdsch_vars[eNB_id]->log2_maxh);
       //printf("TM3 log2_maxh : %d\n",lte_ue_pdsch_vars[eNB_id]->log2_maxh);
 
     }
     else {
-
+      LOG_E(PHY, "only 2 tx antennas supported for TM3\n");
     }
   }
-  else if (dlsch0_harq->mimo_mode<DUALSTREAM_UNIFORM_PRECODING1) {// single-layer precoding, TM4 (Single-codeword)/5 (single or 2 user)/6
+  else if (dlsch0_harq->mimo_mode<DUALSTREAM_UNIFORM_PRECODING1) {// single-layer precoding (TM5, TM6), potentially TM4 (Single-codeword)
     //    printf("Channel compensation for precoding\n");
     //    if ((dual_stream_flag==1) && (eNB_id_i==NUMBER_OF_CONNECTED_eNB_MAX)) {
     if ((dual_stream_flag==1) && (eNB_id_i==phy_vars_ue->n_connected_eNB)) {  // TM5 two-user
@@ -484,7 +491,13 @@ int rx_pdsch(PHY_VARS_UE *phy_vars_ue,
       }
 #endif  
 
-      dlsch_dual_stream_correlation(frame_parms, symbol, nb_rb, lte_ue_pdsch_vars[eNB_id]->dl_ch_estimates_ext, lte_ue_pdsch_vars[eNB_id_i]->dl_ch_estimates_ext, lte_ue_pdsch_vars[eNB_id]->dl_ch_rho_ext, lte_ue_pdsch_vars[eNB_id]->log2_maxh);
+      dlsch_dual_stream_correlation(frame_parms, 
+				    symbol, 
+				    nb_rb, 
+				    lte_ue_pdsch_vars[eNB_id]->dl_ch_estimates_ext, 
+				    lte_ue_pdsch_vars[eNB_id_i]->dl_ch_estimates_ext, 
+				    lte_ue_pdsch_vars[eNB_id]->dl_ch_rho_ext, 
+				    lte_ue_pdsch_vars[eNB_id]->log2_maxh);
 
     }
     else {
@@ -634,6 +647,14 @@ int rx_pdsch(PHY_VARS_UE *phy_vars_ue,
 			    symbol,first_symbol_flag,nb_rb,
 			    adjust_G2(frame_parms,dlsch0_harq->rb_alloc,2,subframe,symbol),
 			    lte_ue_pdsch_vars[eNB_id]->llr128);
+	dlsch_qpsk_qpsk_llr(frame_parms,
+			    lte_ue_pdsch_vars[eNB_id]->rxdataF_comp1[dlsch0_harq->round],
+			    lte_ue_pdsch_vars[eNB_id]->rxdataF_comp0,
+			    lte_ue_pdsch_vars[eNB_id]->dl_ch_rho2_ext,
+			    lte_ue_pdsch_vars[eNB_id]->llr[1],
+			    symbol,first_symbol_flag,nb_rb,
+			    adjust_G2(frame_parms,dlsch1_harq->rb_alloc,2,subframe,symbol),
+			    lte_ue_pdsch_vars[eNB_id]->llr128_2ndstream);
       }
       else if (get_Qm(dlsch1_harq->mcs) == 4) { 
 	dlsch_qpsk_16qam_llr(frame_parms,
@@ -1364,7 +1385,7 @@ void dlsch_channel_compensation_TM3(LTE_DL_FRAME_PARMS *frame_parms,
   int **dl_ch_magb0           = lte_ue_pdsch_vars->dl_ch_magb0;
   int **dl_ch_magb1           = lte_ue_pdsch_vars->dl_ch_magb1;
   int **rxdataF_comp0         = lte_ue_pdsch_vars->rxdataF_comp0;
-  int **rxdataF_comp1         = lte_ue_pdsch_vars->rxdataF_comp1[0];
+  int **rxdataF_comp1         = lte_ue_pdsch_vars->rxdataF_comp1[round]; //?
   __m128i mmtmpD0,mmtmpD1,mmtmpD2,mmtmpD3,QAM_amp0_128,QAM_amp0_128b,QAM_amp1_128,QAM_amp1_128b;   
     
 
diff --git a/openair1/PHY/impl_defs_lte.h b/openair1/PHY/impl_defs_lte.h
index 44e0303f320913038e8120ca992aa7d284acc72d..8b657fac2f1281d0bb27488d39ff8b0aa9e366fc 100644
--- a/openair1/PHY/impl_defs_lte.h
+++ b/openair1/PHY/impl_defs_lte.h
@@ -577,15 +577,20 @@ typedef struct {
 } LTE_DL_FRAME_PARMS;
 
 typedef enum {
+  /// TM1
   SISO=0,
+  /// TM2
   ALAMOUTI=1,
+  /// TM3
   LARGE_CDD=2,
+  /// the next 6 entries are for TM5
   UNIFORM_PRECODING11=3,
   UNIFORM_PRECODING1m1=4,
   UNIFORM_PRECODING1j=5,
   UNIFORM_PRECODING1mj=6,
   PUSCH_PRECODING0=7,
   PUSCH_PRECODING1=8,
+  /// the next 3 entries are for TM4
   DUALSTREAM_UNIFORM_PRECODING1=9,
   DUALSTREAM_UNIFORM_PRECODINGj=10,
   DUALSTREAM_PUSCH_PRECODING=11,
@@ -791,8 +796,8 @@ typedef struct {
   /// - second index: ? [0..168*N_RB_DL[
   int32_t **rxdataF_comp0;
   /// \brief Received frequency-domain signal after extraction and channel compensation.
-  /// - first index: ? [0..7] (hard coded) FIXME! accessed via \c nb_antennas_rx
-  /// - second index: ? [0..7] (hard coded)
+  /// - first index: ? [0..7] (hard coded) accessed via \c round
+  /// - second index: ? [0..7] (hard coded) FIXME! accessed via \c nb_antennas_rx
   /// - third index: ? [0..168*N_RB_DL[
   int32_t **rxdataF_comp1[8];
   /// \brief Downlink channel estimates extracted in PRBS.
@@ -803,6 +808,10 @@ typedef struct {
   /// - first index: ? [0..7] (hard coded) FIXME! accessed via \c nb_antennas_rx
   /// - second index: ? [0..168*N_RB_DL[
   int32_t **dl_ch_rho_ext;
+  /// \brief Downlink cross-correlation of MIMO channel estimates (unquantized PMI) extracted in PRBS.
+  /// - first index: ? [0..7] (hard coded) FIXME! accessed via \c nb_antennas_rx
+  /// - second index: ? [0..168*N_RB_DL[
+  int32_t **dl_ch_rho2_ext;
   /// \brief Downlink PMIs extracted in PRBS and grouped in subbands.
   /// - first index: ressource block [0..N_RB_DL[
   uint8_t *pmi_ext;
@@ -844,6 +853,10 @@ typedef struct {
   /// - first index: ? [0..0] (hard coded)
   /// - second index: ? [0..]
   int16_t **llr128;
+  /// \brief Pointers to llr vectors (128-bit alignment).
+  /// - first index: ? [0..0] (hard coded)
+  /// - second index: ? [0..]
+  int16_t **llr128_2ndstream;
   //uint32_t *rb_alloc;
   //uint8_t Qm[2];
   //MIMO_mode_t mimo_mode;
diff --git a/openair1/SIMULATION/LTE_PHY/dlsim.c b/openair1/SIMULATION/LTE_PHY/dlsim.c
index a2537ea67c76914d021cfaf0029ff7410487a3a9..63fd6a23f11d45a5e16c51d84a8e510543d65f87 100644
--- a/openair1/SIMULATION/LTE_PHY/dlsim.c
+++ b/openair1/SIMULATION/LTE_PHY/dlsim.c
@@ -178,7 +178,7 @@ void lte_param_init(unsigned char N_tx, unsigned char N_rx,unsigned char transmi
     PHY_vars_UE->pdsch_config_dedicated->p_a  = dB0; // 4 = 0dB
     ((PHY_vars_UE->lte_frame_parms).pdsch_config_common).p_b = 0; 
   }
-  else {(lte_frame_parms->nb_antennas_tx_eNB>1) ? 1 : 0; // rho_a = rhob
+  else {// rho_a = rhob
     PHY_vars_eNB->pdsch_config_dedicated->p_a  = dB0; // 4 = 0dB
     ((PHY_vars_eNB->lte_frame_parms).pdsch_config_common).p_b = 1;
     PHY_vars_UE->pdsch_config_dedicated->p_a  = dB0; // 4 = 0dB
@@ -268,7 +268,7 @@ int main(int argc, char **argv) {
   unsigned int ret;
   unsigned int coded_bits_per_codeword=0,nsymb,dci_cnt,tbs=0;
  
-  unsigned int tx_lev=0,tx_lev_dB=0,trials,errs[4]={0,0,0,0},round_trials[4]={0,0,0,0},dci_errors=0,dlsch_active=0,num_layers;
+  unsigned int tx_lev=0,tx_lev_dB=0,trials,errs[4]={0,0,0,0},errs2[4]={0,0,0,0},round_trials[4]={0,0,0,0},dci_errors=0,dlsch_active=0,num_layers;
   int re_allocated;
   char fname[32],vname[32];
   FILE *bler_fd;
@@ -348,7 +348,7 @@ int main(int argc, char **argv) {
 
   LTE_DL_UE_HARQ_t *dlsch0_ue_harq;
   LTE_DL_eNB_HARQ_t *dlsch0_eNB_harq;
-
+  uint8_t Kmimo;
 
 
   opp_enabled=1; // to enable the time meas
@@ -891,31 +891,34 @@ int main(int argc, char **argv) {
     exit(-1);
   }
 
+  if ((transmission_mode == 3) || (transmission_mode==4)) 
+    Kmimo=2;
+  else
+    Kmimo=1;
+
   for (k=0;k<n_users;k++) {
     // Create transport channel structures for 2 transport blocks (MIMO)
     for (i=0;i<2;i++) {
-      PHY_vars_eNB->dlsch_eNB[k][i] = new_eNB_dlsch(1,8,N_RB_DL,0);
-      
+      PHY_vars_eNB->dlsch_eNB[k][i] = new_eNB_dlsch(Kmimo,8,N_RB_DL,0);
       if (!PHY_vars_eNB->dlsch_eNB[k][i]) {
 	printf("Can't get eNB dlsch structures\n");
 	exit(-1);
       }
-      
       PHY_vars_eNB->dlsch_eNB[k][i]->rnti = n_rnti+k;
     }
   }
-  // structure for SIC at UE
-  PHY_vars_UE->dlsch_eNB[0] = new_eNB_dlsch(1,8,N_RB_DL,0);
-
+  
   for (i=0;i<2;i++) {
-    PHY_vars_UE->dlsch_ue[0][i]  = new_ue_dlsch(1,8,MAX_TURBO_ITERATIONS,N_RB_DL,0);
+    PHY_vars_UE->dlsch_ue[0][i]  = new_ue_dlsch(Kmimo,8,MAX_TURBO_ITERATIONS,N_RB_DL,0);
     if (!PHY_vars_UE->dlsch_ue[0][i]) {
       printf("Can't get ue dlsch structures\n");
       exit(-1);
     }    
     PHY_vars_UE->dlsch_ue[0][i]->rnti   = n_rnti;
   }
-  
+  // structure for SIC at UE
+  PHY_vars_UE->dlsch_eNB[0] = new_eNB_dlsch(Kmimo,8,N_RB_DL,0);
+
   if (DLSCH_alloc_pdu2_1E[0].tpmi == 5) {
 
     PHY_vars_eNB->eNB_UE_stats[0].DL_pmi_single = (unsigned short)(taus()&0xffff);
@@ -1919,6 +1922,10 @@ int main(int argc, char **argv) {
       errs[1]=0;
       errs[2]=0;
       errs[3]=0;
+      errs2[0]=0;
+      errs2[1]=0;
+      errs2[2]=0;
+      errs2[3]=0;
       round_trials[0] = 0;
       round_trials[1] = 0;
       round_trials[2] = 0;
@@ -2373,25 +2380,25 @@ int main(int argc, char **argv) {
 	    }
 
 	    for (k=0;k<n_users;k++) {
-
+	      for (int cw=0; cw<Kmimo; cw++) {
 	      coded_bits_per_codeword = get_G(&PHY_vars_eNB->lte_frame_parms,
-					      PHY_vars_eNB->dlsch_eNB[k][0]->harq_processes[0]->nb_rb,
-					      PHY_vars_eNB->dlsch_eNB[k][0]->harq_processes[0]->rb_alloc,
-					      get_Qm(PHY_vars_eNB->dlsch_eNB[k][0]->harq_processes[0]->mcs),
-					      PHY_vars_eNB->dlsch_eNB[k][0]->harq_processes[0]->Nl,
+					      PHY_vars_eNB->dlsch_eNB[k][cw]->harq_processes[0]->nb_rb,
+					      PHY_vars_eNB->dlsch_eNB[k][cw]->harq_processes[0]->rb_alloc,
+					      get_Qm(PHY_vars_eNB->dlsch_eNB[k][cw]->harq_processes[0]->mcs),
+					      PHY_vars_eNB->dlsch_eNB[k][cw]->harq_processes[0]->Nl,
 					      num_pdcch_symbols,
 					      0,subframe);
 
 #ifdef TBS_FIX   // This is for MESH operation!!!
-	      tbs = (double)3*TBStable[get_I_TBS(PHY_vars_eNB->dlsch_eNB[k][0]->harq_processes[0]->mcs)][PHY_vars_eNB->dlsch_eNB[k][0]->nb_rb-1]/4;
+	      tbs = (double)3*TBStable[get_I_TBS(PHY_vars_eNB->dlsch_eNB[k][cw]->harq_processes[0]->mcs)][PHY_vars_eNB->dlsch_eNB[k][cw]->nb_rb-1]/4;
 #else
-	      tbs = PHY_vars_eNB->dlsch_eNB[k][0]->harq_processes[0]->TBS;
+	      tbs = PHY_vars_eNB->dlsch_eNB[k][cw]->harq_processes[0]->TBS;
 #endif
 	      rate = (double)tbs/(double)coded_bits_per_codeword;
 	      
 	      if ((SNR==snr0) && (trials==0) && (round==0))
-		printf("User %d: Rate = %f (%f bits/dim) (G %d, TBS %d, mod %d, pdcch_sym %d, ndi %d)\n",
-		       k,rate,rate*get_Qm(PHY_vars_eNB->dlsch_eNB[k][0]->harq_processes[0]->mcs),
+		printf("User %d, cw %d: Rate = %f (%f bits/dim) (G %d, TBS %d, mod %d, pdcch_sym %d, ndi %d)\n",
+		       k,cw,rate,rate*get_Qm(PHY_vars_eNB->dlsch_eNB[k][0]->harq_processes[0]->mcs),
 		       coded_bits_per_codeword,
 		       tbs,
 		       get_Qm(PHY_vars_eNB->dlsch_eNB[k][0]->harq_processes[0]->mcs),
@@ -2415,16 +2422,17 @@ int main(int argc, char **argv) {
 
 
 	      start_meas(&PHY_vars_eNB->dlsch_encoding_stats);	      
-	      if (dlsch_encoding(input_buffer0[k],
+	      if (dlsch_encoding(((cw==0) ? input_buffer0[k] : input_buffer1[k]),
 				 &PHY_vars_eNB->lte_frame_parms,
 				 num_pdcch_symbols,
-				 PHY_vars_eNB->dlsch_eNB[k][0],
+				 PHY_vars_eNB->dlsch_eNB[k][cw],
 				 0,subframe,
 				 &PHY_vars_eNB->dlsch_rate_matching_stats,
 				 &PHY_vars_eNB->dlsch_turbo_encoding_stats,
 				 &PHY_vars_eNB->dlsch_interleaving_stats
 				 )<0)
 		exit(-1);
+	      /*
 	      if (transmission_mode == 3) {
 		if (dlsch_encoding(input_buffer1[k],
 				   &PHY_vars_eNB->lte_frame_parms,
@@ -2437,30 +2445,33 @@ int main(int argc, char **argv) {
 				   )<0)
 		  exit(-1);
 	      }
+	      */
 	      stop_meas(&PHY_vars_eNB->dlsch_encoding_stats);  
 
-	      PHY_vars_eNB->dlsch_eNB[k][0]->rnti = (common_flag==0) ? n_rnti+k : SI_RNTI;	  
+	      PHY_vars_eNB->dlsch_eNB[k][cw]->rnti = (common_flag==0) ? n_rnti+k : SI_RNTI;	  
 	      start_meas(&PHY_vars_eNB->dlsch_scrambling_stats);	      
 	      dlsch_scrambling(&PHY_vars_eNB->lte_frame_parms,
 			       0,
-			       PHY_vars_eNB->dlsch_eNB[k][0],
+			       PHY_vars_eNB->dlsch_eNB[k][cw],
 			       coded_bits_per_codeword,
 			       0,
 			       subframe<<1);
 	      stop_meas(&PHY_vars_eNB->dlsch_scrambling_stats);	      
+
 	      if (n_frames==1) {
-		for (s=0;s<PHY_vars_eNB->dlsch_eNB[k][0]->harq_processes[0]->C;s++) {
-		  if (s<PHY_vars_eNB->dlsch_eNB[k][0]->harq_processes[0]->Cminus)
-		    Kr = PHY_vars_eNB->dlsch_eNB[k][0]->harq_processes[0]->Kminus;
+		for (s=0;s<PHY_vars_eNB->dlsch_eNB[k][cw]->harq_processes[0]->C;s++) {
+		  if (s<PHY_vars_eNB->dlsch_eNB[k][cw]->harq_processes[0]->Cminus)
+		    Kr = PHY_vars_eNB->dlsch_eNB[k][cw]->harq_processes[0]->Kminus;
 		  else
-		    Kr = PHY_vars_eNB->dlsch_eNB[k][0]->harq_processes[0]->Kplus;
+		    Kr = PHY_vars_eNB->dlsch_eNB[k][cw]->harq_processes[0]->Kplus;
 	      
 		  Kr_bytes = Kr>>3;
 	      
 		  for (i=0;i<Kr_bytes;i++)
-		    printf("%d : (%x)\n",i,PHY_vars_eNB->dlsch_eNB[k][0]->harq_processes[0]->c[s][i]);
+		    printf("%d : (%x)\n",i,PHY_vars_eNB->dlsch_eNB[k][cw]->harq_processes[0]->c[s][i]);
 		}
 	      }
+	      }
 	  
 	      start_meas(&PHY_vars_eNB->dlsch_modulation_stats);	      
 	      re_allocated = dlsch_modulation(PHY_vars_eNB->lte_eNB_common_vars.txdataF[eNB_id],
@@ -3094,17 +3105,20 @@ PHY_vars_UE->lte_ue_pdcch_vars[0]->num_pdcch_symbols,
 	    }
 	  }
 
-	  PHY_vars_UE->dlsch_ue[0][0]->rnti = (common_flag==0) ? n_rnti: SI_RNTI;
+	  for (int cw=Kmimo-1;cw>=0;cw--){
+	  PHY_vars_UE->dlsch_ue[0][cw]->rnti = (common_flag==0) ? n_rnti: SI_RNTI;
 	  coded_bits_per_codeword = get_G(&PHY_vars_eNB->lte_frame_parms,
-					  PHY_vars_eNB->dlsch_eNB[0][0]->harq_processes[0]->nb_rb,
-					  PHY_vars_eNB->dlsch_eNB[0][0]->harq_processes[0]->rb_alloc,
-					  get_Qm(PHY_vars_eNB->dlsch_eNB[0][0]->harq_processes[0]->mcs),
-					  PHY_vars_eNB->dlsch_eNB[0][0]->harq_processes[0]->Nl,
+					  PHY_vars_eNB->dlsch_eNB[0][cw]->harq_processes[0]->nb_rb,
+					  PHY_vars_eNB->dlsch_eNB[0][cw]->harq_processes[0]->rb_alloc,
+					  get_Qm(PHY_vars_eNB->dlsch_eNB[0][cw]->harq_processes[0]->mcs),
+					  PHY_vars_eNB->dlsch_eNB[0][cw]->harq_processes[0]->Nl,
 					  num_pdcch_symbols,
 					  0,subframe);
 
-	  PHY_vars_UE->dlsch_ue[0][0]->harq_processes[PHY_vars_UE->dlsch_ue[0][0]->current_harq_pid]->G = coded_bits_per_codeword;
+	  PHY_vars_UE->dlsch_ue[0][cw]->harq_processes[PHY_vars_UE->dlsch_ue[0][cw]->current_harq_pid]->G = coded_bits_per_codeword;
+	  
 
+	  /*
 	  // calculate uncoded BLER
 	  uncoded_ber=0;
 	  for (i=0;i<coded_bits_per_codeword;i++) 
@@ -3120,31 +3134,42 @@ PHY_vars_UE->lte_ue_pdcch_vars[0]->num_pdcch_symbols,
 	  
 	  if (n_frames==1)
 	    write_output("uncoded_ber_bit.m","uncoded_ber_bit",uncoded_ber_bit,coded_bits_per_codeword,1,0);
-	  
+	  */
 
 	  start_meas(&PHY_vars_UE->dlsch_unscrambling_stats);	      
 	  dlsch_unscrambling(&PHY_vars_UE->lte_frame_parms,
 			     0,
-			     PHY_vars_UE->dlsch_ue[0][0],
+			     PHY_vars_UE->dlsch_ue[0][cw],
 			     coded_bits_per_codeword,
-			     PHY_vars_UE->lte_ue_pdsch_vars[eNB_id]->llr[0],
+			     PHY_vars_UE->lte_ue_pdsch_vars[eNB_id]->llr[cw],
 			     0,
 			     subframe<<1);
 	  stop_meas(&PHY_vars_UE->dlsch_unscrambling_stats);	      
 
 	  start_meas(&PHY_vars_UE->dlsch_decoding_stats);
 	  ret = dlsch_decoding(PHY_vars_UE,
-			       PHY_vars_UE->lte_ue_pdsch_vars[eNB_id]->llr[0],		 
+			       PHY_vars_UE->lte_ue_pdsch_vars[eNB_id]->llr[cw],		 
 			       &PHY_vars_UE->lte_frame_parms,
-			       PHY_vars_UE->dlsch_ue[0][0],
-			       PHY_vars_UE->dlsch_ue[0][0]->harq_processes[PHY_vars_UE->dlsch_ue[0][0]->current_harq_pid],
+			       PHY_vars_UE->dlsch_ue[0][cw],
+			       PHY_vars_UE->dlsch_ue[0][cw]->harq_processes[PHY_vars_UE->dlsch_ue[0][cw]->current_harq_pid],
 			       subframe,
-			       PHY_vars_UE->dlsch_ue[0][0]->current_harq_pid,
+			       PHY_vars_UE->dlsch_ue[0][cw]->current_harq_pid,
 			       1,llr8_flag);
 	  stop_meas(&PHY_vars_UE->dlsch_decoding_stats); 
-	  
+
+	  if (cw==1) {
+	    if (ret <= PHY_vars_UE->dlsch_ue[0][cw]->max_turbo_iterations) {
+	    }
+	    else {
+	      errs2[round]++;
+	    }
+	  }
+	  }
+
+
 	  stop_meas(&PHY_vars_UE->phy_proc_rx);
 
+
 	  if (ret <= PHY_vars_UE->dlsch_ue[0][0]->max_turbo_iterations) {
 		
 	    avg_iter += ret;
@@ -3154,9 +3179,10 @@ PHY_vars_UE->lte_ue_pdcch_vars[0]->num_pdcch_symbols,
 
 	    PHY_vars_UE->total_TBS[eNB_id] =  PHY_vars_UE->total_TBS[eNB_id] + PHY_vars_UE->dlsch_ue[eNB_id][0]->harq_processes[PHY_vars_UE->dlsch_ue[eNB_id][0]->current_harq_pid]->TBS;
 	    TB0_active = 0;
-	    if (PHY_vars_UE->dlsch_ue[eNB_id][0]->harq_processes[PHY_vars_UE->dlsch_ue[eNB_id][0]->current_harq_pid]->mimo_mode == LARGE_CDD) {   //try to decode second stream
-	      for (round = 0 ; round < PHY_vars_UE->dlsch_ue[eNB_id][0]->harq_processes[PHY_vars_UE->dlsch_ue[eNB_id][0]->current_harq_pid]->round ; round++) {
-
+	    if (PHY_vars_UE->dlsch_ue[eNB_id][0]->harq_processes[PHY_vars_UE->dlsch_ue[eNB_id][0]->current_harq_pid]->mimo_mode == LARGE_CDD) {   //try to decode second stream using SIC
+	      /*
+		for (round = 0 ; round < PHY_vars_UE->dlsch_ue[eNB_id][0]->harq_processes[PHY_vars_UE->dlsch_ue[eNB_id][0]->current_harq_pid]->round ; round++) {
+		// re-encoding of first stream
 		dlsch0_ue_harq = PHY_vars_UE->dlsch_ue[eNB_id][0]->harq_processes[PHY_vars_UE->dlsch_ue[eNB_id][0]->current_harq_pid];
 		dlsch0_eNB_harq = PHY_vars_UE->dlsch_eNB[eNB_id]->harq_processes[PHY_vars_UE->dlsch_ue[eNB_id][0]->current_harq_pid];
 		dlsch0_eNB_harq->mimo_mode    = LARGE_CDD;
@@ -3181,8 +3207,17 @@ PHY_vars_UE->lte_ue_pdcch_vars[0]->num_pdcch_symbols,
 			       &PHY_vars_UE->dlsch_rate_matching_stats,
 			       &PHY_vars_UE->dlsch_turbo_encoding_stats,
 			       &PHY_vars_UE->dlsch_interleaving_stats
-			       );		
+			       );	
+
+			       //scrambling
+
+		//modulation
+
+		//stripping (from matched filter output?)
+
+		//detection of second stream	
 	      }
+	      */
 	    }
 	  }	
 	  else {
@@ -3416,8 +3451,9 @@ PHY_vars_UE->lte_ue_pdcch_vars[0]->num_pdcch_symbols,
 	     (double)tx_lev_dB+10*log10(PHY_vars_UE->lte_frame_parms.ofdm_symbol_size/(NB_RB*12)),
 	     sigma2_dB);
       
-      printf("Errors (%d/%d %d/%d %d/%d %d/%d), Pe = (%e,%e,%e,%e), dci_errors %d/%d, Pe = %e => effective rate %f  (%2.1f%%,%f, %f), normalized delay %f (%f)\n",
+      printf("Errors (%d(%d)/%d %d/%d %d/%d %d/%d), Pe = (%e,%e,%e,%e), dci_errors %d/%d, Pe = %e => effective rate %f  (%2.1f%%,%f, %f), normalized delay %f (%f)\n",
 	     errs[0],
+	     errs2[0],
 	     round_trials[0],
 	     errs[1],
 	     round_trials[0],