diff --git a/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c b/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c
index d4983e262fc881e6df3d450f4dd304efeb423e7f..d4eb4be68d6b4edac9849f621aa7c52226fab224 100644
--- a/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c
+++ b/openair1/PHY/LTE_TRANSPORT/dlsch_demodulation.c
@@ -468,7 +468,7 @@ int rx_pdsch(PHY_VARS_UE *phy_vars_ue,
                                     symbol,
                                     nb_rb,
                                     lte_ue_pdsch_vars[eNB_id]->dl_ch_estimates_ext,
-                                    &(lte_ue_pdsch_vars[eNB_id]->dl_ch_estimates_ext[2]),
+                                    0,
                                     lte_ue_pdsch_vars[eNB_id]->dl_ch_rho_ext[harq_pid][round],
 			   	    lte_ue_pdsch_vars[eNB_id]->log2_maxh0);
 	
@@ -476,14 +476,14 @@ int rx_pdsch(PHY_VARS_UE *phy_vars_ue,
 
       //to be optimized (just take complex conjugate)
 
-      dlsch_dual_stream_correlation(frame_parms,
+      dlsch_dual_stream_correlation_opposite(frame_parms,
                                     symbol,
                                     nb_rb,
-                                    &(lte_ue_pdsch_vars[eNB_id]->dl_ch_estimates_ext[2]),
                                     lte_ue_pdsch_vars[eNB_id]->dl_ch_estimates_ext,
+				    0,
                                     lte_ue_pdsch_vars[eNB_id]->dl_ch_rho2_ext,
 				    lte_ue_pdsch_vars[eNB_id]->log2_maxh1);
-     // printf("rho stream2 =%d\n",&lte_ue_pdsch_vars[eNB_id]->dl_ch_rho2_ext );
+    //  printf("rho stream2 =%d\n",&lte_ue_pdsch_vars[eNB_id]->dl_ch_rho2_ext );
       //printf("TM3 log2_maxh : %d\n",lte_ue_pdsch_vars[eNB_id]->log2_maxh);
 
    }
@@ -2567,7 +2567,7 @@ void dlsch_dual_stream_correlation(LTE_DL_FRAME_PARMS *frame_parms,
   //  printf("Dual stream correlation (%p)\n",dl_ch_estimates_ext_i);
 
   for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
-
+ //printf ("antenna %d", aarx);
     dl_ch128          = (__m128i *)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12];
 
     if (dl_ch_estimates_ext_i == NULL) // TM3/4
@@ -2598,7 +2598,7 @@ void dlsch_dual_stream_correlation(LTE_DL_FRAME_PARMS *frame_parms,
       //      print_ints("c0",&mmtmpD2);
       //      print_ints("c1",&mmtmpD3);
       dl_ch_rho128[0] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
-
+     //print_shorts("rho 0:",dl_ch_rho128);
       // multiply by conjugated channel
       mmtmpD0 = _mm_madd_epi16(dl_ch128[1],dl_ch128i[1]);
       // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
@@ -2629,7 +2629,118 @@ void dlsch_dual_stream_correlation(LTE_DL_FRAME_PARMS *frame_parms,
         mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
         mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
         dl_ch_rho128[2] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+	
+       dl_ch128+=3;
+        dl_ch128i+=3;
+        dl_ch_rho128+=3;
+      } else {
+
+        dl_ch128+=2;
+        dl_ch128i+=2;
+        dl_ch_rho128+=2;
+      }
+    }           
 
+  }
+    
+  _mm_empty();
+  _m_empty();
+
+#elif defined(__arm__)
+
+#endif
+}
+
+void dlsch_dual_stream_correlation_opposite(LTE_DL_FRAME_PARMS *frame_parms,
+                                   unsigned char symbol,
+                                   unsigned short nb_rb,
+                                   int **dl_ch_estimates_ext,
+                                   int **dl_ch_estimates_ext_i,
+                                   int **dl_ch_rho_ext,
+                                   unsigned char output_shift)
+{
+
+#if defined(__x86_64__)||defined(__i386__)
+
+  unsigned short rb;
+  __m128i *dl_ch128,*dl_ch128i,*dl_ch_rho128,mmtmpD0,mmtmpD1,mmtmpD2,mmtmpD3;
+  unsigned char aarx,symbol_mod,pilots=0;
+
+  //    printf("dlsch_dual_stream_correlation: symbol %d\n",symbol);
+
+  symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
+
+  if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp))) {
+    pilots=1;
+  }
+
+  //  printf("Dual stream correlation (%p)\n",dl_ch_estimates_ext_i);
+
+  for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
+    //printf ("antenna %d", aarx);
+
+    dl_ch128          = (__m128i *)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12];
+
+    if (dl_ch_estimates_ext_i == NULL) // TM3/4
+      dl_ch128i         = (__m128i *)&dl_ch_estimates_ext[2+aarx][symbol*frame_parms->N_RB_DL*12];
+    else
+      dl_ch128i         = (__m128i *)&dl_ch_estimates_ext_i[aarx][symbol*frame_parms->N_RB_DL*12];
+
+    dl_ch_rho128      = (__m128i *)&dl_ch_rho_ext[aarx][symbol*frame_parms->N_RB_DL*12];
+
+
+    for (rb=0; rb<nb_rb; rb++) {
+      // multiply by conjugated channel
+      mmtmpD0 = _mm_madd_epi16(dl_ch128[0],dl_ch128i[0]);
+      //      print_ints("re",&mmtmpD0);
+      // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+      mmtmpD1 = _mm_shufflelo_epi16(dl_ch128i[0],_MM_SHUFFLE(2,3,0,1));
+      mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+      mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)&conjugate[0]);
+      mmtmpD1 = _mm_madd_epi16(mmtmpD1,dl_ch128[0]);
+      //      print_ints("im",&mmtmpD1);
+      // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+      mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+      //      print_ints("re(shift)",&mmtmpD0);
+      mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
+      //      print_ints("im(shift)",&mmtmpD1);
+      mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+      mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+      //      print_ints("c0",&mmtmpD2);
+      //      print_ints("c1",&mmtmpD3);
+      dl_ch_rho128[0] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+     // print_shorts("rho 2:",dl_ch_rho128);
+      // multiply by conjugated channel
+      mmtmpD0 = _mm_madd_epi16(dl_ch128[1],dl_ch128i[1]);
+      // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+      mmtmpD1 = _mm_shufflelo_epi16(dl_ch128i[1],_MM_SHUFFLE(2,3,0,1));
+      mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+      mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
+      mmtmpD1 = _mm_madd_epi16(mmtmpD1,dl_ch128[1]);
+      // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+      mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+      mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
+      mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+      mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+      dl_ch_rho128[1] =_mm_packs_epi32(mmtmpD2,mmtmpD3);
+
+            
+      if (pilots==0) {  
+
+        // multiply by conjugated channel
+        mmtmpD0 = _mm_madd_epi16(dl_ch128[2],dl_ch128i[2]);
+        // mmtmpD0 contains real part of 4 consecutive outputs (32-bit)
+        mmtmpD1 = _mm_shufflelo_epi16(dl_ch128i[2],_MM_SHUFFLE(2,3,0,1));
+        mmtmpD1 = _mm_shufflehi_epi16(mmtmpD1,_MM_SHUFFLE(2,3,0,1));
+        mmtmpD1 = _mm_sign_epi16(mmtmpD1,*(__m128i*)conjugate);
+        mmtmpD1 = _mm_madd_epi16(mmtmpD1,dl_ch128[2]);
+        // mmtmpD1 contains imag part of 4 consecutive outputs (32-bit)
+        mmtmpD0 = _mm_srai_epi32(mmtmpD0,output_shift);
+        mmtmpD1 = _mm_srai_epi32(mmtmpD1,output_shift);
+        mmtmpD2 = _mm_unpacklo_epi32(mmtmpD0,mmtmpD1);
+        mmtmpD3 = _mm_unpackhi_epi32(mmtmpD0,mmtmpD1);
+        dl_ch_rho128[2] = _mm_packs_epi32(mmtmpD2,mmtmpD3);
+	
        dl_ch128+=3;
         dl_ch128i+=3;
         dl_ch_rho128+=3;
diff --git a/openair1/PHY/LTE_TRANSPORT/proto.h b/openair1/PHY/LTE_TRANSPORT/proto.h
index 392e20f0be97120aa4443355901b4aeba30af558..66fc01098c0a9aee5ccd92d279cd7a2583fdedde 100644
--- a/openair1/PHY/LTE_TRANSPORT/proto.h
+++ b/openair1/PHY/LTE_TRANSPORT/proto.h
@@ -952,6 +952,14 @@ void dlsch_dual_stream_correlation(LTE_DL_FRAME_PARMS *frame_parms,
                                    int **dl_ch_rho_ext,
                                    unsigned char output_shift);
 
+void dlsch_dual_stream_correlation_opposite(LTE_DL_FRAME_PARMS *frame_parms,
+                                   unsigned char symbol,
+                                   unsigned short nb_rb,
+                                   int **dl_ch_estimates_ext,
+                                   int **dl_ch_estimates_ext_i,
+                                   int **dl_ch_rho_ext,
+                                   unsigned char output_shift);
+
 void dlsch_channel_compensation_TM56(int **rxdataF_ext,
                                      int **dl_ch_estimates_ext,
                                      int **dl_ch_mag,
diff --git a/openair1/SIMULATION/LTE_PHY/dlsim.c b/openair1/SIMULATION/LTE_PHY/dlsim.c
index f38e9eb3c84fc6467b050349203e3876b05f142f..a356de4420f02eae55380e1ffd8315c03429724a 100644
--- a/openair1/SIMULATION/LTE_PHY/dlsim.c
+++ b/openair1/SIMULATION/LTE_PHY/dlsim.c
@@ -790,14 +790,14 @@ int main(int argc, char **argv)
     sprintf(bler_fname,"bler_tx%d_rec%d_chan%d_nrx%d_mcs%d_mcsi%d_u%d_imod%d.csv",transmission_mode,rx_type,channel_model,n_rx,mcs1,mcs_i,rx_type,i_mod);
   else if (abstx == 1) 
     if (perfect_ce==1)
-	 sprintf(bler_fname,"bler_tx%d_r%d_ch%d_%d_nrx%d_mcs%d_mcsi%d_ab_pce_sh%d_d2_avsn2_rho.csv",transmission_mode,rx_type,channel_model,n_frames, n_rx,mcs1, mcs2,interf_unaw_shift );
+	 sprintf(bler_fname,"bler_tx%d_r%d_ch%d_%d_nrx%d_mcs%d_mcsi%d_ab_pce_sh%d_d2_rho.csv",transmission_mode,rx_type,channel_model,n_frames, n_rx,mcs1, mcs2,interf_unaw_shift );
   	else
-    	sprintf(bler_fname,"bler_tx%d_r%d_ch%d_%d_nrx%d_mcs%d_mcsi%d_ab_sh%d_d2_avsn2_rho.csv",transmission_mode,rx_type,channel_model, n_frames, n_rx,mcs1, mcs2,interf_unaw_shift );
+    	sprintf(bler_fname,"bler_tx%d_r%d_ch%d_%d_nrx%d_mcs%d_mcsi%d_ab_sh%d_d2_rho.csv",transmission_mode,rx_type,channel_model, n_frames, n_rx,mcs1, mcs2,interf_unaw_shift );
   else //abstx=0
     if (perfect_ce==1)
-	sprintf(bler_fname,"bler_tx%d_r%d_ch%d_%d_nrx%d_mcs%d_mcsi%d_pce_sh%d_d2_avsn2_rho.csv",transmission_mode,rx_type,channel_model,n_frames, n_rx,mcs1, mcs2, interf_unaw_shift);
+	sprintf(bler_fname,"bler_tx%d_r%d_ch%d_%d_nrx%d_mcs%d_mcsi%d_pce_sh%d_d2_rho.csv",transmission_mode,rx_type,channel_model,n_frames, n_rx,mcs1, mcs2, interf_unaw_shift);
    else
-        sprintf(bler_fname,"bler_tx%d_r%d_ch%d_%d_nrx%d_mcs%d_mcsi%d_sh%d_d2_avsn2_rho.csv",transmission_mode,rx_type,channel_model,n_frames,n_rx,mcs1, mcs2, interf_unaw_shift);
+        sprintf(bler_fname,"bler_tx%d_r%d_ch%d_%d_nrx%d_mcs%d_mcsi%d_sh%d_d2_rho.csv",transmission_mode,rx_type,channel_model,n_frames,n_rx,mcs1, mcs2, interf_unaw_shift);
   
   bler_fd = fopen(bler_fname,"w");
   if (bler_fd==NULL) {
@@ -833,9 +833,9 @@ int main(int argc, char **argv)
     
     else 
       if (perfect_ce==1)
-		sprintf(csv_fname,"dout_tx%d_r%d_mcs%d_mcsi%d_ch%d_ns%d_R%d_ab_fix_pce_sh%d_d2_%d_avsn2_rho.m",transmission_mode,rx_type,mcs1,mcs2,channel_model,n_frames,num_rounds, interf_unaw_shift, n_ch_rlz);
+		sprintf(csv_fname,"dout_tx%d_r%d_mcs%d_mcsi%d_ch%d_ns%d_R%d_ab_fix_pce_sh%d_d2_%d_rho.m",transmission_mode,rx_type,mcs1,mcs2,channel_model,n_frames,num_rounds, interf_unaw_shift, n_ch_rlz);
  	else
-		sprintf(csv_fname,"dout_tx%d_r%d_mcs%d_mcsi%d_ch%d_ns%d_R%d_ab_fix_sh%d_d2_%d_avsn2_rho.m",transmission_mode,rx_type,mcs1,mcs2,channel_model,n_frames,num_rounds, interf_unaw_shift, n_ch_rlz);
+		sprintf(csv_fname,"dout_tx%d_r%d_mcs%d_mcsi%d_ch%d_ns%d_R%d_ab_fix_sh%d_d2_%d_rho.m",transmission_mode,rx_type,mcs1,mcs2,channel_model,n_frames,num_rounds, interf_unaw_shift, n_ch_rlz);
 
     csv_fd = fopen(csv_fname,"w");
     fprintf(csv_fd,"data_all%d=[",mcs1);