diff --git a/openair1/PHY/defs.h b/openair1/PHY/defs.h
index 8a6c827ff80b5b9c3cda69dc0f977e331d603a39..5fdd88cd2398b788466026551aa19a697a1c5cc1 100755
--- a/openair1/PHY/defs.h
+++ b/openair1/PHY/defs.h
@@ -246,6 +246,8 @@ typedef struct eNB_proc_t_s {
   int frame_rx;
   /// frame to act upon for PRACH
   int frame_prach;
+  /// \internal This variable is protected by \ref mutex_fep.
+  int instance_cnt_fep;
   /// \brief Instance count for FH processing thread.
   /// \internal This variable is protected by \ref mutex_FH.
   int instance_cnt_FH;
@@ -264,6 +266,8 @@ typedef struct eNB_proc_t_s {
   int first_rx;
   /// flag to indicate first TX transmission
   int first_tx;
+  /// pthread attributes for parallel fep thread
+  pthread_attr_t attr_fep;
   /// pthread attributes for FH processing thread
   pthread_attr_t attr_FH;
   /// pthread attributes for single eNB processing thread
@@ -272,6 +276,8 @@ typedef struct eNB_proc_t_s {
   pthread_attr_t attr_prach;
   /// pthread attributes for asynchronous RX thread
   pthread_attr_t attr_asynch_rxtx;
+  /// scheduling parameters for parallel fep thread
+  struct sched_param sched_param_fep;
   /// scheduling parameters for FH thread
   struct sched_param sched_param_FH;
   /// scheduling parameters for single eNB thread
@@ -280,14 +286,20 @@ typedef struct eNB_proc_t_s {
   struct sched_param sched_param_prach;
   /// scheduling parameters for asynch_rxtx thread
   struct sched_param sched_param_asynch_rxtx;
+  /// pthread structure for parallel fep thread
+  pthread_t pthread_fep;
   /// pthread structure for PRACH thread
   pthread_t pthread_prach;
+  /// condition variable for parallel fep thread
+  pthread_cond_t cond_fep;
   /// condition variable for FH thread
   pthread_cond_t cond_FH;
   /// condition variable for PRACH processing thread;
   pthread_cond_t cond_prach;
   /// condition variable for asynch RX/TX thread
   pthread_cond_t cond_asynch_rxtx;
+  /// mutex for parallel fep thread
+  pthread_mutex_t mutex_fep;
   /// mutex for FH
   pthread_mutex_t mutex_FH;
   /// mutex for PRACH thread
diff --git a/openair1/SCHED/phy_procedures_lte_eNb.c b/openair1/SCHED/phy_procedures_lte_eNb.c
index 6053a151a432c07ba03c654a509f68e0e6cb17f2..be7aedca658513ddc48d4a785cf77bd783bef70f 100755
--- a/openair1/SCHED/phy_procedures_lte_eNb.c
+++ b/openair1/SCHED/phy_procedures_lte_eNb.c
@@ -2497,6 +2497,158 @@ void cba_procedures(PHY_VARS_eNB *eNB,eNB_rxtx_proc_t *proc,int UE_id,int harq_p
 
 }
 
+typedef struct {
+  PHY_VARS_eNB *eNB;
+  int slot;
+} fep_task;
+
+void fep0(PHY_VARS_eNB *eNB,int slot) {
+
+  eNB_proc_t *proc       = &eNB->proc;
+  LTE_DL_FRAME_PARMS *fp = &eNB->frame_parms;
+  int l;
+
+  remove_7_5_kHz(eNB,(slot&1)+(proc->subframe_rx<<1));
+  for (l=0; l<fp->symbols_per_tti/2; l++) {
+    slot_fep_ul(fp,
+		&eNB->common_vars,
+		l,
+		(slot&1)+(proc->subframe_rx<<1),
+		0,
+		0
+		);
+  }
+}
+
+static inline int release_thread(pthread_mutex_t *mutex,int *instance_cnt,char *name) {
+
+  if (pthread_mutex_lock(mutex) != 0) {
+    LOG_E( PHY, "[SCHED][eNB] error locking mutex for %s\n",name);
+    exit_fun("nothing to add");
+    return(-1);
+  }
+  
+  *instance_cnt=*instance_cnt-1;
+  
+  if (pthread_mutex_unlock(mutex) != 0) {
+    LOG_E( PHY, "[SCHED][eNB] error unlocking mutex for %s\n",name);
+    exit_fun("nothing to add");
+    return(-1);
+  }
+  return(0);
+}
+
+static inline int wait_on_condition(pthread_mutex_t *mutex,pthread_cond_t *cond,int *instance_cnt,char *name) {
+
+  if (pthread_mutex_lock(mutex) != 0) {
+    LOG_E( PHY, "[SCHED][eNB] error locking mutex for %s\n",name);
+    exit_fun("nothing to add");
+    return(-1);
+  }
+  
+  while (*instance_cnt < 0) {
+    // most of the time the thread is waiting here
+    // proc->instance_cnt_rxtx is -1
+    pthread_cond_wait(cond,mutex); // this unlocks mutex_rxtx while waiting and then locks it again
+  }
+
+  if (pthread_mutex_unlock(mutex) != 0) {
+    LOG_E(PHY,"[SCHED][eNB] error unlocking mutex for %s\n",name);
+    exit_fun("nothing to add");
+    return(-1);
+  }
+  return(0);
+}
+
+extern int oai_exit;
+
+
+static void *fep_thread(void *param) {
+
+  PHY_VARS_eNB *eNB = (PHY_VARS_eNB *)param;
+  eNB_proc_t *proc  = &eNB->proc;
+  while (!oai_exit) {
+    printf("Waiting for parallel FEP signal\n");
+    if (wait_on_condition(&proc->mutex_fep,&proc->cond_fep,&proc->instance_cnt_fep,"fep thread")<0) break;  
+    printf("Running parallel FEP on first slot\n");
+    fep0(eNB,0);
+    if (release_thread(&proc->mutex_fep,&proc->instance_cnt_fep,"fep thread")<0) break;
+  }
+  return(NULL);
+}
+
+void init_fep_thread(PHY_VARS_eNB *eNB,pthread_attr_t *attr_fep) {
+
+  eNB_proc_t *proc = &eNB->proc;
+
+  proc->instance_cnt_fep         = -1;
+    
+  pthread_mutex_init( &proc->mutex_fep, NULL);
+  pthread_cond_init( &proc->cond_fep, NULL);
+
+  pthread_create(&proc->pthread_fep, attr_fep, fep_thread, (void*)eNB);
+
+}
+
+void eNB_fep_full_2thread(PHY_VARS_eNB *eNB) {
+
+  eNB_proc_t *proc = &eNB->proc;
+  struct timespec wait;
+  int wait_cnt=0;
+  wait.tv_sec=0;
+  wait.tv_nsec=5000000L;
+
+  VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_ENB_SLOT_FEP,1);
+  start_meas(&eNB->ofdm_demod_stats);
+
+  printf("Running 2 thread FEP\n"); 
+
+  if (pthread_mutex_timedlock(&proc->mutex_fep,&wait) != 0) {
+    printf("[eNB] ERROR pthread_mutex_lock for fep thread %d (IC %d)\n", proc->instance_cnt_fep);
+    exit_fun( "error locking mutex_fep" );
+    return;
+  }
+
+  if (proc->instance_cnt_fep==0) {
+    printf("[eNB] FEP thread busy\n");
+    exit_fun("FEP thread busy");
+    pthread_mutex_unlock( &proc->mutex_fep );
+    return;
+  }
+  
+  ++proc->instance_cnt_fep;
+
+  printf("[eNB] waking up FEP thread\n");  
+  if (pthread_cond_signal(&proc->cond_fep) != 0) {
+    printf("[eNB] ERROR pthread_cond_signal for fep thread\n");
+    exit_fun( "ERROR pthread_cond_signal" );
+    return;
+  }
+  
+  pthread_mutex_unlock( &proc->mutex_fep );
+
+  // call second slot in this symbol
+  printf("Calling FEP for 2nd slot\n"); 
+  fep0(eNB,1);
+
+  if (pthread_mutex_timedlock(&proc->mutex_fep,&wait) != 0) {
+    printf("[eNB] ERROR pthread_mutex_lock for fep thread %d (IC %d)\n", proc->instance_cnt_fep);
+    exit_fun( "error locking mutex_fep" );
+    return;
+  }
+  while (proc->instance_cnt_fep==0) {
+    wait_cnt++;
+    if (wait_cnt>10000)
+      break;
+  };
+
+  pthread_mutex_unlock( &proc->mutex_fep );
+  if (wait_cnt>10000) {
+    printf("[eNB] parallel FEP didn't finish\n");
+    exit_fun( "error" );
+  }
+}
+
 void eNB_fep_full(PHY_VARS_eNB *eNB) {
 
   eNB_proc_t *proc = &eNB->proc;
diff --git a/openair1/SIMULATION/LTE_PHY/ulsim.c b/openair1/SIMULATION/LTE_PHY/ulsim.c
index 47565f4e0c3e9010ab34bbfe28375c19c80974e7..70510cbc042d6171125257d5bd1bf42db0c9804d 100644
--- a/openair1/SIMULATION/LTE_PHY/ulsim.c
+++ b/openair1/SIMULATION/LTE_PHY/ulsim.c
@@ -85,6 +85,8 @@ double t_rx_min = 1000000000; /*!< \brief initial min process time for tx */
 int n_tx_dropped = 0; /*!< \brief initial max process time for tx */
 int n_rx_dropped = 0; /*!< \brief initial max process time for rx */
 
+int oai_exit = 0;
+
 
 void fill_ulsch_dci(PHY_VARS_eNB *eNB,void *UL_dci,int first_rb,int nb_rb,int mcs,int ndi,int cqi_flag) {
 
@@ -171,6 +173,7 @@ void fill_ulsch_dci(PHY_VARS_eNB *eNB,void *UL_dci,int first_rb,int nb_rb,int mc
 }
 
 extern void eNB_fep_full(PHY_VARS_eNB *eNB);
+extern void eNB_fep_full_2thread(PHY_VARS_eNB *eNB);
 
 int main(int argc, char **argv)
 {
@@ -688,6 +691,8 @@ int main(int argc, char **argv)
   eNB->ulsch[0] = new_eNB_ulsch(max_turbo_iterations,N_RB_DL,0);
   UE->ulsch[0]   = new_ue_ulsch(N_RB_DL,0);
 
+  init_fep_thread(eNB,&eNB->proc.attr_fep);
+
   // Create transport channel structures for 2 transport blocks (MIMO)
   for (i=0; i<2; i++) {
     eNB->dlsch[0][i] = new_eNB_dlsch(1,8,1827072,N_RB_DL,0);
@@ -1181,7 +1186,7 @@ int main(int argc, char **argv)
           }
 
 
-	  eNB->fep = eNB_fep_full;
+	  eNB->fep = eNB_fep_full_2thread;
 	  eNB->do_prach = NULL;
 
 	  phy_procedures_eNB_common_RX(eNB);
@@ -1700,6 +1705,9 @@ int main(int argc, char **argv)
   }//ch realization
 
 
+  oai_exit=1;
+  pthread_cond_signal(&eNB->proc.cond_fep);
+
   if (abstx) { // ABSTRACTION
     fprintf(csv_fdUL,"];");
     fclose(csv_fdUL);
diff --git a/targets/RT/USER/lte-enb.c b/targets/RT/USER/lte-enb.c
index b614ca131d31b2397a8bae1fdab18b5685f96284..d4f83d5a2c57ff8a3e8bbb21eb8d4c1701be4a3b 100644
--- a/targets/RT/USER/lte-enb.c
+++ b/targets/RT/USER/lte-enb.c
@@ -1329,6 +1329,7 @@ static void* eNB_thread_single( void* param ) {
 
 }
 
+extern void init_fep_thread(PHY_VARS_eNB *, pthread_attr_t *);
 
 void init_eNB_proc(int inst) {
   
@@ -1337,7 +1338,7 @@ void init_eNB_proc(int inst) {
   PHY_VARS_eNB *eNB;
   eNB_proc_t *proc;
   eNB_rxtx_proc_t *proc_rxtx;
-  pthread_attr_t *attr0=NULL,*attr1=NULL,*attr_FH=NULL,*attr_prach=NULL,*attr_asynch=NULL,*attr_single=NULL;
+  pthread_attr_t *attr0=NULL,*attr1=NULL,*attr_FH=NULL,*attr_prach=NULL,*attr_asynch=NULL,*attr_single=NULL,*attr_fep=NULL;
 
   for (CC_id=0; CC_id<MAX_NUM_CCs; CC_id++) {
     eNB = PHY_vars_eNB_g[inst][CC_id];
@@ -1347,10 +1348,10 @@ void init_eNB_proc(int inst) {
     proc_rxtx = proc->proc_rxtx;
     proc_rxtx[0].instance_cnt_rxtx = -1;
     proc_rxtx[1].instance_cnt_rxtx = -1;
-    proc->instance_cnt_prach = -1;
-    proc->instance_cnt_FH = -1;
+    proc->instance_cnt_prach       = -1;
+    proc->instance_cnt_FH          = -1;
     proc->instance_cnt_asynch_rxtx = -1;
-    proc->CC_id = CC_id;
+    proc->CC_id = CC_id;    
     
     proc->first_rx=1;
     proc->first_tx=1;
@@ -1373,6 +1374,7 @@ void init_eNB_proc(int inst) {
     attr_prach  = &proc->attr_prach;
     attr_asynch = &proc->attr_asynch_rxtx;
     attr_single = &proc->attr_single;
+    attr_fep    = &proc->attr_fep;
 #endif
 
     if (eNB->single_thread_flag==0) {
@@ -1380,9 +1382,10 @@ void init_eNB_proc(int inst) {
       pthread_create( &proc_rxtx[1].pthread_rxtx, attr1, eNB_thread_rxtx, &proc_rxtx[1] );
       pthread_create( &proc->pthread_FH, attr_FH, eNB_thread_FH, &eNB->proc );
     }
-    else 
+    else {
       pthread_create(&proc->pthread_single, attr_single, eNB_thread_single, &eNB->proc);
-
+      init_fep_thread(eNB,attr_fep);
+    }
     pthread_create( &proc->pthread_prach, attr_prach, eNB_thread_prach, &eNB->proc );
     if ((eNB->node_timing == synch_to_other) ||
 	(eNB->node_function == NGFI_RRU_IF5) ||
@@ -1631,7 +1634,7 @@ void init_eNB(eNB_func_t node_function[], eNB_timing_t node_timing[],int nb_inst
 	break;
       case NGFI_RRU_IF4p5:
 	eNB->do_prach             = do_prach;
-	eNB->fep                  = eNB_fep_full;
+	eNB->fep                  = (eNB->single_thread_flag == 0) ? NB_fep_full : eNB_fep_full_2thread;
 	eNB->proc_uespec_rx       = NULL;
 	eNB->proc_tx              = NULL;//proc_tx_rru_if4p5;
 	eNB->tx_fh                = NULL;
@@ -1658,7 +1661,7 @@ void init_eNB(eNB_func_t node_function[], eNB_timing_t node_timing[],int nb_inst
 	break;
       case eNodeB_3GPP:
 	eNB->do_prach             = do_prach;
-	eNB->fep                  = eNB_fep_full;
+	eNB->fep                  = (eNB->single_thread_flag == 0) ? NB_fep_full : eNB_fep_full_2thread;
 	eNB->proc_uespec_rx       = phy_procedures_eNB_uespec_RX;
 	eNB->proc_tx              = proc_tx_full;
 	eNB->tx_fh                = NULL;
@@ -1676,7 +1679,7 @@ void init_eNB(eNB_func_t node_function[], eNB_timing_t node_timing[],int nb_inst
 	break;
       case eNodeB_3GPP_BBU:
 	eNB->do_prach       = do_prach;
-	eNB->fep            = eNB_fep_full;
+	eNB->fep            = (eNB->single_thread_flag == 0) ? NB_fep_full : eNB_fep_full_2thread;
 	eNB->proc_uespec_rx = phy_procedures_eNB_uespec_RX;
 	eNB->proc_tx        = proc_tx_full;
 	eNB->tx_fh          = tx_fh_if5;