From 973a0795b41b48d66bb14191fb7415d5e1a82ce7 Mon Sep 17 00:00:00 2001
From: Hongzhi Wang <hongzhi.wang@tcl.com>
Date: Mon, 17 Jun 2019 16:51:50 +0200
Subject: [PATCH] update ue dlsch decoding multi-threading

---
 executables/nr-ue.c                           |    6 +
 .../PHY/NR_UE_TRANSPORT/nr_dlsch_decoding.c   | 1020 ++++-------------
 .../NR_UE_TRANSPORT/nr_transport_proto_ue.h   |    2 +-
 openair1/PHY/defs_common.h                    |    1 +
 openair1/PHY/defs_nr_UE.h                     |    6 +-
 openair1/PHY/thread_NR_UE.h                   |    2 +-
 openair1/SCHED_NR_UE/defs.h                   |    2 +-
 7 files changed, 245 insertions(+), 794 deletions(-)

diff --git a/executables/nr-ue.c b/executables/nr-ue.c
index d6946f1ac78..7b2de4288ee 100644
--- a/executables/nr-ue.c
+++ b/executables/nr-ue.c
@@ -742,6 +742,7 @@ void init_UE(int nb_inst) {
   int inst;
   NR_UE_MAC_INST_t *mac_inst;
   pthread_t threads[nb_inst];
+  pthread_t dlsch0_threads;
 
   for (inst=0; inst < nb_inst; inst++) {
     PHY_VARS_NR_UE *UE = PHY_vars_UE_g[inst][0];
@@ -763,6 +764,11 @@ void init_UE(int nb_inst) {
     mac_inst->initial_bwp_ul.cyclic_prefix = UE->frame_parms.Ncp;
     LOG_I(PHY,"Intializing UE Threads for instance %d (%p,%p)...\n",inst,PHY_vars_UE_g[inst],PHY_vars_UE_g[inst][0]);
     threadCreate(&threads[inst], UE_thread, (void *)UE, "UEthread", -1, OAI_PRIORITY_RT_MAX);
+
+#ifdef UE_DLSCH_PARALLELISATION
+    threadCreate(&dlsch0_threads, dlsch_thread, (void *)UE, "DLthread", -1, OAI_PRIORITY_RT_MAX-1);
+#endif
+
   }
 
   printf("UE threads created by %ld\n", gettid());
diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_decoding.c b/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_decoding.c
index 73c5c3ef6f7..51adcf6087f 100644
--- a/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_decoding.c
+++ b/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_decoding.c
@@ -49,6 +49,9 @@
 static uint64_t nb_total_decod =0;
 static uint64_t nb_error_decod =0;
 
+notifiedFIFO_t freeBlocks;
+notifiedFIFO_elt_t *msgToPush;
+
 //extern double cpuf;
 
 void free_nr_ue_dlsch(NR_UE_DLSCH_t *dlsch)
@@ -686,11 +689,10 @@ uint32_t  nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
   uint32_t G;
   uint32_t ret,offset;
   //short dummy_w[MAX_NUM_DLSCH_SEGMENTS][3*(8448+64)];
-  uint32_t r,r_offset=0,Kr,Kr_bytes,err_flag=0,K_bytes_F;
+  uint32_t r,r_offset=0,Kr=8424,Kr_bytes,err_flag=0,K_bytes_F;
   uint8_t crc_type;
   //UE_rxtx_proc_t *proc = &phy_vars_ue->proc;
-  int32_t no_iteration_ldpc;
-  int Cby2;
+  int32_t no_iteration_ldpc,length_dec;
   /*uint8_t C;
   uint8_t Qm;
   uint8_t Nl;
@@ -711,16 +713,19 @@ uint32_t  nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
   uint8_t kb, kc;
   uint8_t Ilbrm = 0;
   uint32_t Tbslbrm = 950984;
-  uint16_t nb_rb = 30; //to update
-  //uint16_t nb_symb_sch = 12;
-  uint8_t nb_re_dmrs = 6;
-  uint16_t length_dmrs = 1;
+  uint16_t nb_rb = 30;
+  double Coderate = 0.0;
+  nfapi_nr_config_request_t *cfg = &phy_vars_ue->nrUE_config;
+  uint8_t dmrs_type = cfg->pdsch_config.dmrs_type.value;
+  uint8_t nb_re_dmrs = (dmrs_type==NFAPI_NR_DMRS_TYPE1)?6:4;
+  uint16_t length_dmrs = 1; //cfg->pdsch_config.dmrs_max_length.value;
 
   uint32_t i,j;
-  //uint32_t k;
 
-    __m128i *pv = (__m128i*)&z;
-    __m128i *pl = (__m128i*)&l;
+  __m128i *pv = (__m128i*)&z;
+  __m128i *pl = (__m128i*)&l;
+  notifiedFIFO_t nf;
+  initNotifiedFIFO(&nf);
 
 
   if (!dlsch_llr) {
@@ -766,7 +771,7 @@ uint32_t  nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
 
   harq_process->TBS = nr_compute_tbs(harq_process->mcs,nb_rb,nb_symb_sch,nb_re_dmrs,length_dmrs, harq_process->Nl);
 
-  A = harq_process->TBS; //2072 for QPSK 1/3
+  A = harq_process->TBS;
 
   ret = dlsch->max_ldpc_iterations;
 
@@ -774,6 +779,9 @@ uint32_t  nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
 
   G = harq_process->G;
 
+  LOG_I(PHY,"DLSCH Decoding main, harq_pid %d TBS %d G %d mcs %d Nl %d nb_symb_sch %d nb_rb %d\n",harq_pid,A,G, harq_process->mcs, harq_process->Nl, nb_symb_sch,nb_rb);
+
+
   proc->decoder_main_available = 1;
   proc->decoder_thread_available = 0;
   proc->decoder_thread_available1 = 0;
@@ -796,28 +804,43 @@ uint32_t  nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
 
     }
 
-  kb = harq_process->K/harq_process->Z;
-      if ( kb==22){
-        p_decParams->BG = 1;
-        p_decParams->R = 13;
-        kc = 68;
-      }
-      else{
-        p_decParams->BG = 2;
-        p_decParams->R = 13;
-        kc = 52;
-      }
+  Coderate = (float) A /(float) G;
+  if ((A <=292) || ((A<=3824) && (Coderate <= 0.6667)) || Coderate <= 0.25)
+  {
+    p_decParams->BG = 2;
+    if (Coderate < 0.3333){
+      p_decParams->R = 15;
+      kc = 52;
+    }
+    else if (Coderate <0.6667){
+      p_decParams->R = 13;
+      kc = 32;
+    }
+    else {
+      p_decParams->R = 23;
+      kc = 17;
+    }
+  }
+  else{
+    p_decParams->BG = 1;
+    if (Coderate < 0.6667){
+      p_decParams->R = 13;
+      kc = 68;
+    }
+    else if (Coderate <0.8889){
+      p_decParams->R = 23;
+      kc = 35;
+    }
+    else {
+      p_decParams->R = 89;
+      kc = 27;
+    }
+  }
 
-      p_decParams->numMaxIter = 2;
-      Kr = p_decParams->Z*kb;
-      p_decParams->outMode= 0;
+  //printf("coderate %f kc %d \n", Coderate, kc);
+  p_decParams->numMaxIter = dlsch->max_ldpc_iterations;
+  p_decParams->outMode= 0;
 
-  /*
-  else {
-    printf("dlsch_decoding.c: Ndi>0 not checked yet!!\n");
-    return(max_ldpc_iterations);
-  }
-  */
   err_flag = 0;
   r_offset = 0;
 
@@ -841,13 +864,39 @@ uint32_t  nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
   printf("Segmentation: C %d, K %d\n",harq_process->C,harq_process->K);
 #endif
 
+  notifiedFIFO_elt_t *res;
   opp_enabled=1;
-  if (harq_process->C>1) { // wakeup worker if more than 1 segment
-    if (pthread_mutex_lock(&proc->mutex_dlsch_td) != 0) {
-      LOG_E( PHY, "[SCHED][UE %d][Slot0] error locking mutex for UE dlsch td\n",phy_vars_ue->Mod_id );
-      exit_fun("nothing to add");
-    }
-
+  if (harq_process->C>1) {
+	for (int nb_seg =1 ; nb_seg<harq_process->C; nb_seg++){
+	  printf("mthread pool C >1\n");
+	  displayList(&Tpool->incomingFifo);
+	  if ( (res=tryPullTpool(&nf, Tpool)) != NULL ) {
+		  printf("mthread pool non null\n");
+	          pushNotifiedFIFO_nothreadSafe(&freeBlocks,res);
+	        }
+
+	  printf("mthread after push\n");
+	  displayList(&freeBlocks);
+	  AssertFatal((msgToPush=pullNotifiedFIFO_nothreadSafe(&freeBlocks)) != NULL,"chained list failure");
+	  nr_rxtx_thread_data_t *curMsg=(nr_rxtx_thread_data_t *)NotifiedFifoData(msgToPush);
+	  curMsg->UE=phy_vars_ue;
+
+	  memset(&curMsg->proc, 0, sizeof(curMsg->proc));
+	  printf("mthread frame %d slot %d\n", proc->frame_rx, proc->nr_tti_rx);
+	  curMsg->proc.frame_rx  = proc->frame_rx;
+	  curMsg->proc.nr_tti_rx = proc->nr_tti_rx;
+	  curMsg->proc.num_seg   = nb_seg;
+
+	  curMsg->proc.eNB_id= eNB_id;
+	  curMsg->proc.harq_pid=harq_pid;
+	  curMsg->proc.llr8_flag = llr8_flag;
+
+	  printf("mthread after pull");
+
+	  msgToPush->key=nb_seg;
+	  pushTpool(Tpool, msgToPush);
+	  printf("mthread after pushTpool\n");
+	  displayList(&Tpool->incomingFifo);
   /*Qm= harq_process->Qm;
     Nl=harq_process->Nl;
     r_thread = harq_process->C/2-1;
@@ -857,7 +906,6 @@ uint32_t  nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
     GpmodC = Gp%C;
 
 
-
     if (r_thread < (C-(GpmodC)))
       Er = Nl*Qm * (Gp/C);
     else
@@ -865,108 +913,10 @@ uint32_t  nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
     printf("mthread Er %d\n", Er);
 
     printf("mthread instance_cnt_dlsch_td %d\n",  proc->instance_cnt_dlsch_td);*/
-
-    proc->instance_cnt_dlsch_td++;
-    proc->eNB_id    = eNB_id;
-    proc->harq_pid  = harq_pid;
-    proc->llr8_flag = llr8_flag;
-  //proc->r[0] = 1;
-
-    if (proc->instance_cnt_dlsch_td == 0)
-    {
-      LOG_D(PHY,"unblock dlsch td processing thread blocked on instance_cnt_dlsch_td : %d \n", proc->instance_cnt_dlsch_td );
-      if (pthread_cond_signal(&proc->cond_dlsch_td) != 0) {
-        LOG_E( PHY, "[SCHED][UE %d][Slot0] ERROR pthread_cond_signal for UE dlsch td\n", phy_vars_ue->Mod_id);
-        exit_fun("nothing to add");
-      }
-      if (pthread_mutex_unlock(&proc->mutex_dlsch_td) != 0) {
-        LOG_E( PHY, "[SCHED][UE %d][Slot0] error unlocking mutex for UE dlsch td \n",phy_vars_ue->Mod_id );
-        exit_fun("nothing to add");
-      }
-    } else
-    {
-      LOG_E( PHY, "[SCHED][UE %d] UE dlsch td thread busy (IC %d)!!\n", phy_vars_ue->Mod_id, proc->instance_cnt_dlsch_td);
-      if (proc->instance_cnt_dlsch_td > 4)
-        exit_fun("instance_cnt_dlsch_td > 4");
-    }
-  //AssertFatal(pthread_cond_signal(&proc->cond_slot1_dl_processing) ==0 ,"");
-    AssertFatal(pthread_mutex_unlock(&proc->mutex_dlsch_td) ==0,"");
-
-    if (harq_process->C>2) {
-      if (pthread_mutex_lock(&proc->mutex_dlsch_td1) != 0) {
-        LOG_E( PHY, "[SCHED][UE %d][Slot0] error locking mutex for UE dlsch td\n",phy_vars_ue->Mod_id );
-        exit_fun("nothing to add");
-      }
-
-      proc->instance_cnt_dlsch_td1++;
-      proc->eNB_id    = eNB_id;
-      proc->harq_pid  = harq_pid;
-      proc->llr8_flag = llr8_flag;
-   // proc->Er = Er;
-
-      if (proc->instance_cnt_dlsch_td1 == 0)
-      {
-        LOG_D(PHY,"unblock slot1 dl processing thread blocked on instance_cnt_dlsch_td : %d \n", proc->instance_cnt_dlsch_td1 );
-        if (pthread_cond_signal(&proc->cond_dlsch_td1) != 0) {
-          LOG_E( PHY, "[SCHED][UE %d][Slot0] ERROR pthread_cond_signal for UE dlsch td\n", phy_vars_ue->Mod_id);
-          exit_fun("nothing to add");
-        }
-        if (pthread_mutex_unlock(&proc->mutex_dlsch_td1) != 0) {
-          LOG_E( PHY, "[SCHED][UE %d][Slot0] error unlocking mutex for UE dlsch td \n",phy_vars_ue->Mod_id );
-          exit_fun("nothing to add");
-        }
-      } else
-      {
-        LOG_E( PHY, "[SCHED][UE %d] UE dlsch td thread 1 busy (IC %d)!!\n", phy_vars_ue->Mod_id, proc->instance_cnt_dlsch_td1);
-        if (proc->instance_cnt_dlsch_td1 > 4)
-          exit_fun("instance_cnt_dlsch_td1 > 4");
-      }
-
-      AssertFatal(pthread_mutex_unlock(&proc->mutex_dlsch_td1) ==0,"");
-
-    }
-  /*
-    if (pthread_mutex_timedlock(&proc->mutex_td,&wait) != 0) {
-      printf("[eNB] ERROR pthread_mutex_lock for TD thread (IC %d)\n", proc->instance_cnt_td);
-      exit_fun( "error locking mutex_fep" );
-      return -1;
-    }
-
-    if (proc->instance_cnt_td==0) {
-      printf("[UE] TD thread busy\n");
-      exit_fun("TD thread busy");
-      pthread_mutex_unlock( &proc->mutex_td );
-      return -1;
-    }
-
-    ++proc->instance_cnt_td;
-
-    proc->tdp.UE        = phy_vars_ue;
-    proc->tdp.eNB_id    = eNB_id;
-    proc->tdp.harq_pid  = harq_pid;
-    proc->tdp.llr8_flag = llr8_flag;
-
-    printf("----- 2thread llr flag %d tdp flag %d\n",llr8_flag, proc->tdp.llr8_flag);
-
-
-    // wakeup worker to do second half segments
-    if (pthread_cond_signal(&proc->cond_td) != 0) {
-      printf("[UE] ERROR pthread_cond_signal for td thread exit\n");
-      exit_fun( "ERROR pthread_cond_signal" );
-      return (1+dlsch->last_iteration_cnt);
-    }
-
-    pthread_mutex_unlock( &proc->mutex_td );*/
-
-
-    Cby2 = 1; //harq_process->C/2;
+	  }
   //proc->decoder_main_available = 1;
   }
-  else {
-    Cby2 = 1;
-  }
 
-//for (r=0; r<Cby2; r++) {
     r = 0;  
     if (r==0) r_offset =0;
 
@@ -1067,45 +1017,46 @@ uint32_t  nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
     memset(harq_process->c[r],0,Kr_bytes);
 
     //    printf("done\n");
-    if (harq_process->C == 1)
+    if (harq_process->C == 1){
       crc_type = CRC24_A;
-    else
+      length_dec = harq_process->B;
+    }
+    else{
       crc_type = CRC24_B;
+      length_dec = (harq_process->B+24*harq_process->C)/harq_process->C;
+    }
 
     //#ifndef __AVX2__
 
     if (err_flag == 0) {
 /*
-        LOG_I(PHY, "turbo algo Kr=%d cb_cnt=%d C=%d nbRB=%d crc_type %d TBSInput=%d TBSHarq=%d TBSplus24=%d mcs=%d Qm=%d RIV=%d round=%d maxIter %d\n",
+        LOG_I(PHY, "LDPC algo Kr=%d cb_cnt=%d C=%d nbRB=%d crc_type %d TBSInput=%d TBSHarq=%d TBSplus24=%d mcs=%d Qm=%d RIV=%d round=%d maxIter %d\n",
                             Kr,r,harq_process->C,harq_process->nb_rb,crc_type,A,harq_process->TBS,
                             harq_process->B,harq_process->mcs,harq_process->Qm,harq_process->rvidx,harq_process->round,dlsch->max_ldpc_iterations);
 */
-      if (llr8_flag) {
-        AssertFatal (Kr >= 256, "turbo algo issue Kr=%d cb_cnt=%d C=%d nbRB=%d TBSInput=%d TBSHarq=%d TBSplus24=%d mcs=%d Qm=%d RIV=%d round=%d\n",
-            Kr,r,harq_process->C,harq_process->nb_rb,A,harq_process->TBS,harq_process->B,harq_process->mcs,harq_process->Qm,harq_process->rvidx,harq_process->round);
-      }
+
 #if UE_TIMING_TRACE
       start_meas(dlsch_turbo_decoding_stats);
 #endif
-      LOG_D(PHY,"mthread AbsSubframe %d.%d Start turbo segment %d/%d \n",frame%1024,nr_tti_rx,r,harq_process->C-1);
+      LOG_D(PHY,"mthread AbsSubframe %d.%d Start LDPC segment %d/%d \n",frame%1024,nr_tti_rx,r,harq_process->C-1);
 
-      for (int cnt =0; cnt < (kc-2)*p_decParams->Z; cnt++){
+      /*for (int cnt =0; cnt < (kc-2)*p_decParams->Z; cnt++){
         inv_d[cnt] = (1)*harq_process->d[r][cnt];
-      }
+      }*/
 
       memset(pv,0,2*p_decParams->Z*sizeof(int16_t));
       //memset(pl,0,2*p_decParams->Z*sizeof(int8_t));
       memset((pv+K_bytes_F),127,harq_process->F*sizeof(int16_t));
 
 
-      for (i=((2*p_decParams->Z)>>3), j = 0; i < K_bytes_F+((2*p_decParams->Z)>>3); i++, j++)
+      for (i=((2*p_decParams->Z)>>3), j = 0; i < K_bytes_F; i++, j++)
       {
-        pv[i]= _mm_loadu_si128((__m128i*)(&inv_d[8*j]));
+        pv[i]= _mm_loadu_si128((__m128i*)(&harq_process->d[r][8*j]));
       }
 
-      for (i=Kr_bytes+((2*p_decParams->Z)>>3),j=Kr_bytes; i < ((kc*p_decParams->Z)>>3); i++, j++)
+      for (i=Kr_bytes,j=K_bytes_F-((2*p_decParams->Z)>>3); i < ((kc*p_decParams->Z)>>3); i++, j++)
       {
-        pv[i]= _mm_loadu_si128((__m128i*)(&inv_d[8*j]));
+        pv[i]= _mm_loadu_si128((__m128i*)(&harq_process->d[r][8*j]));
       }
 
       for (i=0, j=0; j < ((kc*p_decParams->Z)>>4);  i+=2, j++)
@@ -1128,8 +1079,8 @@ uint32_t  nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
         ret=2;
       }
 
-      if (check_crc(llrProcBuf,harq_process->B,harq_process->F,crc_type)) {
-        printf("CRC OK\n");
+      if (check_crc((uint8_t*)llrProcBuf,length_dec,harq_process->F,crc_type)) {
+        printf("Segment %d CRC OK\n",r);
         ret = 2;
       }
       else {
@@ -1245,7 +1196,7 @@ uint32_t  nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
   printf("C %d\n",harq_process->C);
   */
   uint32_t wait = 0;
-  if (harq_process->C==2){
+  /*if (harq_process->C==2){
     while((proc->decoder_thread_available == 0) )
   {
           usleep(1);
@@ -1258,15 +1209,21 @@ uint32_t  nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
                   usleep(1);
                   wait++;
           }
-  }
+  }*/
+
+  /*notifiedFIFO_elt_t *res1=tryPullTpool(&nf, Tpool);
+  if (!res1) {
+	  printf("mthread trypull null\n");
+	  usleep(1);
+	  wait++;
+  }*/
 
   proc->decoder_main_available = 0;
+  Kr = harq_process->K; //to check if same K in all segments
+  Kr_bytes = Kr>>3;
   
   for (r=0; r<harq_process->C; r++) {
 
-    Kr = harq_process->K; //to check if same K in all segments
-    Kr_bytes = Kr>>3;
-
       memcpy(harq_process->b+offset,
                harq_process->c[r],
                Kr_bytes- - (harq_process->F>>3) -((harq_process->C>1)?3:0));
@@ -1286,27 +1243,20 @@ uint32_t  nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
   dlsch->last_iteration_cnt = ret;
   //proc->decoder_thread_available = 0;
   //proc->decoder_main_available = 0;
-  
-  //wait for worker to finish
-  //wait_on_busy_condition(&proc->mutex_td,&proc->cond_td,&proc->instance_cnt_dlsch td,"dlsch td thread");
-
-  //return( (ret>proc->tdp.ret) ? ret : proc->tdp.ret );
 
   return(ret);
 }
 #endif
 
 #ifdef UE_DLSCH_PARALLELISATION
-#define FIFO_PRIORITY   39
-void *nr_dlsch_decoding_2thread0(void *arg)
+void *nr_dlsch_decoding_process(void *arg)
 {
-    static __thread int UE_dlsch_td_retval;
-    struct nr_rxtx_thread_data *rtd = arg;
-    UE_nr_rxtx_proc_t *proc = rtd->proc;
-    PHY_VARS_NR_UE    *phy_vars_ue   = rtd->UE;
+	nr_rxtx_thread_data_t *rxtxD= (nr_rxtx_thread_data_t *)arg;
+    UE_nr_rxtx_proc_t *proc = &rxtxD->proc;
+    PHY_VARS_NR_UE    *phy_vars_ue   = rxtxD->UE;
     NR_DL_FRAME_PARMS *frame_parms = &phy_vars_ue->frame_parms;
     int llr8_flag1;
-    int32_t no_iteration_ldpc;
+    int32_t no_iteration_ldpc,length_dec;
     t_nrLDPC_dec_params decParams;
     t_nrLDPC_dec_params* p_decParams = &decParams;
     t_nrLDPC_time_stats procTime;
@@ -1322,6 +1272,7 @@ void *nr_dlsch_decoding_2thread0(void *arg)
     uint8_t Ilbrm = 0;
     uint32_t Tbslbrm = 950984;
     uint16_t nb_rb = 30; //to update
+    double Coderate = 0.0;
     uint16_t nb_symb_sch = 12;
     uint8_t nb_re_dmrs = 6;
     uint16_t length_dmrs = 1;
@@ -1333,23 +1284,10 @@ void *nr_dlsch_decoding_2thread0(void *arg)
     __m128i *pl = (__m128i*)&l;
 
     proc->instance_cnt_dlsch_td=-1;
-    proc->nr_tti_rx=proc->sub_frame_start;
+    //proc->nr_tti_rx=proc->sub_frame_start;
 
-    proc->decoder_thread_available = 0;
+    proc->decoder_thread_available = 1;
     
-    char threadname[256];
-    sprintf(threadname,"UE_thread_dlsch_td_%d", proc->sub_frame_start);
-
-  cpu_set_t cpuset;
-    CPU_ZERO(&cpuset);
-    if ( (proc->sub_frame_start+1)%RX_NB_TH == 0 && threads.dlsch_td_one != -1 )
-      CPU_SET(threads.dlsch_td_one, &cpuset);
-    if ( (proc->sub_frame_start+1)%RX_NB_TH == 1 && threads.dlsch_td_two != -1 )
-      CPU_SET(threads.dlsch_td_two, &cpuset);
-    if ( (proc->sub_frame_start+1)%RX_NB_TH == 2 && threads.dlsch_td_three != -1 )
-      CPU_SET(threads.dlsch_td_three, &cpuset);
-
-
 
 #if UE_TIMING_TRACE
   time_stats_t *dlsch_rate_unmatching_stats=&phy_vars_ue->dlsch_rate_unmatching_stats;
@@ -1367,45 +1305,21 @@ void *nr_dlsch_decoding_2thread0(void *arg)
   uint8_t Nl;
   //uint32_t Er;
 
-  init_thread(900000,1000000 , FIFO_PRIORITY-1, &cpuset, threadname);
-  while (!oai_exit) {
-
-    //proc->decoder_thread_available = 1;
-    
-          if (pthread_mutex_lock(&proc->mutex_dlsch_td) != 0) {
-              LOG_E( PHY, "[SCHED][UE] error locking mutex for UE dlsch td\n" );
-              exit_fun("nothing to add");
-          }
-          while (proc->instance_cnt_dlsch_td < 0) {
-              // most of the time, the thread is waiting here
-              pthread_cond_wait( &proc->cond_dlsch_td, &proc->mutex_dlsch_td );
-          }
-          if (pthread_mutex_unlock(&proc->mutex_dlsch_td) != 0) {
-              LOG_E( PHY, "[SCHED][UE] error unlocking mutex for UE dlsch_td \n" );
-              exit_fun("nothing to add");
-          }
+  int eNB_id                = proc->eNB_id;
+  int harq_pid              = proc->harq_pid;
+  llr8_flag1                = proc->llr8_flag;
+  int frame                 = proc->frame_rx;
+  int slot              = proc->nr_tti_rx;
+  r               			= proc->num_seg;
 
-          uint32_t wait = 0;
-          while(proc->decoder_main_available == 0)
-          {
-              usleep(1);
-              wait++;
-          }
+  NR_UE_DLSCH_t *dlsch      = phy_vars_ue->dlsch[phy_vars_ue->current_thread_id[slot]][eNB_id][0];
+  NR_DL_UE_HARQ_t *harq_process  = dlsch->harq_processes[harq_pid];
+  short *dlsch_llr        = phy_vars_ue->pdsch_vars[phy_vars_ue->current_thread_id[slot]][eNB_id]->llr[0];
+  //printf("2thread0 llr flag %d tdp flag %d\n",llr8_flag1, tdp->llr8_flag);
+  p_nrLDPC_procBuf = harq_process->p_nrLDPC_procBuf[r];
+  nb_symb_sch = harq_process->nb_symbols;
+  printf("dlsch decoding process frame %d slot %d segment %d r %d nb symb %d \n", frame, proc->nr_tti_rx, proc->num_seg, r, harq_process->nb_symbols);
 
-          //proc->decoder_thread_available = 0;
-          //PHY_VARS_NR_UE *phy_vars_ue       = tdp->UE;
-          int eNB_id                = proc->eNB_id;
-          int harq_pid              = proc->harq_pid;
-          llr8_flag1              = proc->llr8_flag;
-          //r_offset            = proc->Er;
-          //UE_rxtx_proc_t *proc        = tdp->proc;
-          int frame                       = proc->frame_rx;
-          int subframe              = proc->nr_tti_rx;
-          NR_UE_DLSCH_t *dlsch      = phy_vars_ue->dlsch[phy_vars_ue->current_thread_id[subframe]][eNB_id][0];
-          NR_DL_UE_HARQ_t *harq_process  = dlsch->harq_processes[harq_pid];
-          short *dlsch_llr        = phy_vars_ue->pdsch_vars[phy_vars_ue->current_thread_id[subframe]][eNB_id]->llr[0];
-          //printf("2thread0 llr flag %d tdp flag %d\n",llr8_flag1, tdp->llr8_flag);
-          p_nrLDPC_procBuf = harq_process->p_nrLDPC_procBuf[1];
 
   /*
   if (nb_rb > frame_parms->N_RB_DL) {
@@ -1428,10 +1342,14 @@ void *nr_dlsch_decoding_2thread0(void *arg)
 
   A = harq_process->TBS; //2072 for QPSK 1/3
 
+
   ret = dlsch->max_ldpc_iterations;
 
   harq_process->G = nr_get_G(nb_rb, nb_symb_sch, nb_re_dmrs, length_dmrs, harq_process->Qm,harq_process->Nl);
   G = harq_process->G;
+
+  LOG_I(PHY,"DLSCH Decoding process, harq_pid %d TBS %d G %d mcs %d Nl %d nb_symb_sch %d nb_rb %d\n",harq_pid,A,G, harq_process->mcs, harq_process->Nl, nb_symb_sch,nb_rb);
+
     
   if (harq_process->round == 0) {
     // This is a new packet, so compute quantities regarding segmentation
@@ -1448,20 +1366,40 @@ void *nr_dlsch_decoding_2thread0(void *arg)
 
     }
 
-  kb = harq_process->K/harq_process->Z;
-    if ( kb==22){
-      p_decParams->BG = 1;
+  Coderate = (float) A /(float) G;
+  if ((A <=292) || ((A<=3824) && (Coderate <= 0.6667)) || Coderate <= 0.25)
+  {
+    p_decParams->BG = 2;
+    if (Coderate < 0.3333){
+      p_decParams->R = 15;
+      kc = 52;
+    }
+    else if (Coderate <0.6667){
       p_decParams->R = 13;
-      kc = 68;
+      kc = 32;
     }
-    else{
-      p_decParams->BG = 2;
+    else {
+      p_decParams->R = 23;
+      kc = 17;
+    }
+  }
+  else{
+    p_decParams->BG = 1;
+    if (Coderate < 0.6667){
       p_decParams->R = 13;
-      kc = 52;
-        }
+      kc = 68;
+    }
+    else if (Coderate <0.8889){
+      p_decParams->R = 23;
+      kc = 35;
+    }
+    else {
+      p_decParams->R = 89;
+      kc = 27;
+    }
+  }
 
-  p_decParams->numMaxIter = 2;
-  Kr = p_decParams->Z*kb;
+  p_decParams->numMaxIter = dlsch->max_ldpc_iterations;
   p_decParams->outMode= 0;
 
   /*
@@ -1511,8 +1449,9 @@ void *nr_dlsch_decoding_2thread0(void *arg)
     //  printf("thread0 r_offset %d\n",r_offset);
            
   //for (r=(harq_process->C/2); r<harq_process->C; r++) {
-         r=1; //(harq_process->C/2);
+     //    r=1; //(harq_process->C/2);
 
+  r_offset = r*r_offset;
 
   Kr = harq_process->K;
   Kr_bytes = Kr>>3;
@@ -1530,8 +1469,10 @@ void *nr_dlsch_decoding_2thread0(void *arg)
                            harq_process->w[r],
                            dlsch_llr+r_offset);
 
-    //for (int i =0; i<16; i++)
-    //          printf("rx output deinterleaving w[%d]= %d r_offset %d\n", i,harq_process->w[r][i], r_offset);
+#ifdef DEBUG_DLSCH_DECODING
+    for (int i =0; i<16; i++)
+              printf("rx output thread 0 deinterleaving w[%d]= %d r_offset %d\n", i,harq_process->w[r][i], r_offset);
+#endif
 
 #if UE_TIMING_TRACE
     stop_meas(dlsch_deinterleaving_stats);
@@ -1596,14 +1537,15 @@ void *nr_dlsch_decoding_2thread0(void *arg)
     //    printf("Clearing c, %p\n",harq_process->c[r]);
     memset(harq_process->c[r],0,Kr_bytes);
 
-    //    printf("done\n");
-    if (harq_process->C == 1)
+    if (harq_process->C == 1){
       crc_type = CRC24_A;
-    else
+      length_dec = harq_process->B;
+    }
+    else{
       crc_type = CRC24_B;
+      length_dec = (harq_process->B+24*harq_process->C)/harq_process->C;
+    }
 
-
-#if 1
     if (err_flag == 0) {
 /*
         LOG_I(PHY, "turbo algo Kr=%d cb_cnt=%d C=%d nbRB=%d crc_type %d TBSInput=%d TBSHarq=%d TBSplus24=%d mcs=%d Qm=%d RIV=%d round=%d maxIter %d\n",
@@ -1627,29 +1569,37 @@ void *nr_dlsch_decoding_2thread0(void *arg)
         //memset(pl,0,2*p_decParams->Z*sizeof(int8_t));
         memset((pv+K_bytes_F),127,harq_process->F*sizeof(int16_t));
 
-        for (i=((2*p_decParams->Z)>>3), j = 0; i < K_bytes_F+((2*p_decParams->Z)>>3); i++, j++)
+        for (i=((2*p_decParams->Z)>>3), j = 0; i < K_bytes_F; i++, j++)
         {
-          pv[i]= _mm_loadu_si128((__m128i*)(&inv_d[8*j]));
+          pv[i]= _mm_loadu_si128((__m128i*)(&harq_process->d[r][8*j]));
         }
 
-    for (i=Kr_bytes+((2*p_decParams->Z)>>3),j=Kr_bytes; i < ((kc*p_decParams->Z)>>3); i++, j++)
-            {
-              pv[i]= _mm_loadu_si128((__m128i*)(&inv_d[8*j]));
-            }
-
-    for (i=0, j=0; j < ((kc*p_decParams->Z)>>4);  i+=2, j++)
-                {
-
-                  pl[j] = _mm_packs_epi16(pv[i],pv[i+1]);
+        for (i=Kr_bytes,j=K_bytes_F-((2*p_decParams->Z)>>3); i < ((kc*p_decParams->Z)>>3); i++, j++)
+        {
+          pv[i]= _mm_loadu_si128((__m128i*)(&harq_process->d[r][8*j]));
+        }
 
-                }
+        for (i=0, j=0; j < ((kc*p_decParams->Z)>>4);  i+=2, j++)
+        {
+          pl[j] = _mm_packs_epi16(pv[i],pv[i+1]);
+        }
 
-    no_iteration_ldpc = nrLDPC_decoder(p_decParams,
+        no_iteration_ldpc = nrLDPC_decoder(p_decParams,
                (int8_t*)&pl[0],
                llrProcBuf,
                            p_nrLDPC_procBuf,                
                p_procTime);
 
+        // Fixme: correct type is unsigned, but nrLDPC_decoder and all called behind use signed int
+        if (check_crc((uint8_t*)llrProcBuf,length_dec,harq_process->F,crc_type)) {
+          printf("Segment %d CRC OK\n",r);
+          ret = 2;
+        }
+        else {
+          printf("CRC NOK\n");
+          ret = 1+dlsch->max_ldpc_iterations;
+        }
+
     if (no_iteration_ldpc > 10)
       printf("Error number of iteration LPDC %d\n", no_iteration_ldpc);
     //else
@@ -1691,552 +1641,46 @@ void *nr_dlsch_decoding_2thread0(void *arg)
     }
   //}
 
-  /*int32_t frame_rx_prev = frame;
-  int32_t subframe_rx_prev = subframe - 1;
-  if (subframe_rx_prev < 0) {
-    frame_rx_prev--;
-    subframe_rx_prev += 10;
-  }
-  frame_rx_prev = frame_rx_prev%1024;*/
-#if 0
-  if (err_flag == 1) {
-//#if UE_DEBUG_TRACE
-    LOG_I(PHY,"[UE %d] THREAD 0 DLSCH: Setting NAK for SFN/SF %d/%d (pid %d, status %d, round %d, TBS %d, mcs %d) Kr %d r %d harq_process->round %d\n",
-        phy_vars_ue->Mod_id, frame, subframe, harq_pid,harq_process->status, harq_process->round,harq_process->TBS,harq_process->mcs,Kr,r,harq_process->round);
-//#endif
-    dlsch->harq_ack[subframe].ack = 0;
-    dlsch->harq_ack[subframe].harq_id = harq_pid;
-    dlsch->harq_ack[subframe].send_harq_status = 1;
-    harq_process->errors[harq_process->round]++;
-    harq_process->round++;
-
-
-    //    printf("Rate: [UE %d] DLSCH: Setting NACK for subframe %d (pid %d, round %d)\n",phy_vars_ue->Mod_id,subframe,harq_pid,harq_process->round);
-    if (harq_process->round >= dlsch->Mdlharq) {
-      harq_process->status = SCH_IDLE;
-      harq_process->round  = 0;
-    }
-/*    if(is_crnti)
-    {
-    LOG_D(PHY,"[UE %d] DLSCH: Setting NACK for subframe %d (pid %d, pid status %d, round %d/Max %d, TBS %d)\n",
-               phy_vars_ue->Mod_id,subframe,harq_pid,harq_process->status,harq_process->round,dlsch->Mdlharq,harq_process->TBS);
-    }*/
-
-
-    //return((1+dlsch->max_ldpc_iterations));
-  } else {
-#if UE_DEBUG_TRACE
-      LOG_I(PHY,"[UE %d] THREAD 0 DLSCH: Setting ACK for subframe %d TBS %d mcs %d nb_rb %d\n",
-           phy_vars_ue->Mod_id,subframe,harq_process->TBS,harq_process->mcs,harq_process->nb_rb);
-#endif
+  proc->decoder_thread_available = 1;
+  //proc->decoder_main_available = 0;
 
-    harq_process->status = SCH_IDLE;
-    harq_process->round  = 0;
-    dlsch->harq_ack[subframe].ack = 1;
-    dlsch->harq_ack[subframe].harq_id = harq_pid;
-    dlsch->harq_ack[subframe].send_harq_status = 1;
-    //LOG_I(PHY,"[UE %d] DLSCH: Setting ACK for SFN/SF %d/%d (pid %d, status %d, round %d, TBS %d, mcs %d)\n",
-      //  phy_vars_ue->Mod_id, frame, subframe, harq_pid, harq_process->status, harq_process->round,harq_process->TBS,harq_process->mcs);
+}
 
-/*    if(is_crnti)
-    {
-    LOG_D(PHY,"[UE %d] DLSCH: Setting ACK for subframe %d (pid %d, round %d, TBS %d)\n",phy_vars_ue->Mod_id,subframe,harq_pid,harq_process->round,harq_process->TBS);
-    }
-    LOG_D(PHY,"[UE %d] DLSCH: Setting ACK for subframe %d (pid %d, round %d)\n",phy_vars_ue->Mod_id,subframe,harq_pid,harq_process->round);
+void *dlsch_thread(void *arg) {
+  //this thread should be over the processing thread to keep in real time
+  PHY_VARS_NR_UE *UE = (PHY_VARS_NR_UE *) arg;
+  notifiedFIFO_t nf;
+  initNotifiedFIFO(&nf);
+  int nbDlProcessing=0;
+  initNotifiedFIFO_nothreadSafe(&freeBlocks);
 
-  }*/
+  for (int i=0; i<RX_NB_TH_DL+1; i++)
+    pushNotifiedFIFO_nothreadSafe(&freeBlocks,
+                                  newNotifiedFIFO_elt(sizeof(nr_rxtx_thread_data_t), 0,&nf,nr_dlsch_decoding_process));
+    printf("dlsch_thread\n");
+    displayList(&freeBlocks);
 
-  // Reassembly of Transport block here
-  offset = 0;
+  while (!oai_exit) {
 
-  /*
-  printf("harq_pid %d\n",harq_pid);
-  printf("F %d, Fbytes %d\n",harq_process->F,harq_process->F>>3);
-  printf("C %d\n",harq_process->C);
-  */
-  for (r=0; r<harq_process->C; r++) {
-    if (r<harq_process->Cminus)
-      Kr = harq_process->Kminus;
-    else
-      Kr = harq_process->Kplus;
+    notifiedFIFO_elt_t *res;
 
-    Kr_bytes = Kr>>3;
+    while (nbDlProcessing >= RX_NB_TH_DL) {
+      if ( (res=tryPullTpool(&nf, Tpool)) != NULL ) {
+    	  printf("dlsch thread trypull non null\n");
+        nr_rxtx_thread_data_t *tmp=(nr_rxtx_thread_data_t *)res->msgData;
+        nbDlProcessing--;
+        pushNotifiedFIFO_nothreadSafe(&freeBlocks,res);
+      }
 
-    //    printf("Segment %d : Kr= %d bytes\n",r,Kr_bytes);
-    if (r==0) {
-      memcpy(harq_process->b,
-             &harq_process->c[0][(harq_process->F>>3)],
-             Kr_bytes - (harq_process->F>>3)- ((harq_process->C>1)?3:0));
-      offset = Kr_bytes - (harq_process->F>>3) - ((harq_process->C>1)?3:0);
-      //            printf("copied %d bytes to b sequence (harq_pid %d)\n",
-      //          Kr_bytes - (harq_process->F>>3),harq_pid);
-      //          printf("b[0] = %x,c[%d] = %x\n",
-      //      harq_process->b[0],
-      //      harq_process->F>>3,
-      //      harq_process->c[0][(harq_process->F>>3)]);
-    } else {
-      memcpy(harq_process->b+offset,
-             harq_process->c[r],
-             Kr_bytes- ((harq_process->C>1)?3:0));
-      offset += (Kr_bytes - ((harq_process->C>1)?3:0));
+      usleep(200);
     }
-  }
-
-  dlsch->last_iteration_cnt = ret;
 
-  //return(ret);
-  }
-#endif
+    nbDlProcessing++;
+    //msgToPush->key=0;
+    //pushTpool(Tpool, msgToPush);
 
-  proc->decoder_thread_available = 1;
-  //proc->decoder_main_available = 0;
-  
-  if (pthread_mutex_lock(&proc->mutex_dlsch_td) != 0) {
-              LOG_E( PHY, "[SCHED][UE] error locking mutex for UE RXTX\n" );
-              exit_fun("noting to add");
-          }
-          proc->instance_cnt_dlsch_td--;
-          if (pthread_mutex_unlock(&proc->mutex_dlsch_td) != 0) {
-              LOG_E( PHY, "[SCHED][UE] error unlocking mutex for UE td1\n" );
-              exit_fun("noting to add");
-          }
-      }
+  } // while !oai_exit
 
-      // thread finished
-          free(arg);
-          return &UE_dlsch_td_retval;
 }
-#endif
-
-#ifdef UE_DLSCH_PARALLELISATION
-#define FIFO_PRIORITY   39
-void *nr_dlsch_decoding_2thread1(void *arg)
-{
-  static __thread int UE_dlsch_td_retval1;
-    struct nr_rxtx_thread_data *rtd = arg;
-    UE_nr_rxtx_proc_t *proc = rtd->proc;
-    PHY_VARS_NR_UE    *phy_vars_ue   = rtd->UE;
-    NR_DL_FRAME_PARMS *frame_parms = &phy_vars_ue->frame_parms;
 
-    int llr8_flag1;
-    int32_t no_iteration_ldpc;
-    t_nrLDPC_dec_params decParams;
-    t_nrLDPC_dec_params* p_decParams = &decParams;
-    t_nrLDPC_time_stats procTime;
-    t_nrLDPC_time_stats* p_procTime =&procTime ;
-    t_nrLDPC_procBuf* p_nrLDPC_procBuf;
-    int8_t llrProcBuf[OAI_LDPC_MAX_NUM_LLR] __attribute__ ((aligned(32)));
-    int16_t z [68*384];
-    int8_t l [68*384];
-    //__m128i l;
-    int16_t inv_d [68*384];
-    //int16_t *p_invd =&inv_d;
-    uint8_t kb, kc;
-    uint8_t Ilbrm = 0;
-    uint32_t Tbslbrm = 950984;
-    uint16_t nb_rb = 30; //to update
-    uint16_t nb_symb_sch = 12;
-    uint8_t nb_re_dmrs = 6;
-    uint16_t length_dmrs = 1;
-
-    uint32_t i,j;
-    //uint32_t k;
-
-    __m128i *pv = (__m128i*)&z;
-    __m128i *pl = (__m128i*)&l;
-
-    proc->instance_cnt_dlsch_td1=-1;
-    proc->nr_tti_rx=proc->sub_frame_start;
-
-    printf("start thread 1\n");
-    proc->decoder_thread_available1 = 0;
-
-    char threadname[256];
-    sprintf(threadname,"UE_thread_dlsch_td1_%d", proc->sub_frame_start);
-
-  cpu_set_t cpuset;
-    CPU_ZERO(&cpuset);
-    if ( (proc->sub_frame_start+1)%RX_NB_TH == 0 && threads.dlsch_td_one != -1 )
-      CPU_SET(threads.dlsch_td_one, &cpuset);
-    if ( (proc->sub_frame_start+1)%RX_NB_TH == 1 && threads.dlsch_td_two != -1 )
-      CPU_SET(threads.dlsch_td_two, &cpuset);
-    if ( (proc->sub_frame_start+1)%RX_NB_TH == 2 && threads.dlsch_td_three != -1 )
-      CPU_SET(threads.dlsch_td_three, &cpuset);
-
-
-
-#if UE_TIMING_TRACE
-  time_stats_t *dlsch_rate_unmatching_stats=&phy_vars_ue->dlsch_rate_unmatching_stats;
-  time_stats_t *dlsch_turbo_decoding_stats=&phy_vars_ue->dlsch_turbo_decoding_stats;
-  time_stats_t *dlsch_deinterleaving_stats=&phy_vars_ue->dlsch_deinterleaving_stats;
-#endif
-    uint32_t A,E;
-    uint32_t G;
-    uint32_t ret,offset;
-    uint32_t r,r_offset=0,Kr,Kr_bytes,err_flag=0,K_bytes_F;
-    uint8_t crc_type;
-    uint8_t C,Cprime;
-    uint8_t Qm;
-    uint8_t Nl;
-    //uint32_t Er;
-
-  init_thread(900000,1000000 , FIFO_PRIORITY-1, &cpuset, threadname);
-  printf("2thread1 oai_exit %d\n", oai_exit);
-  while (!oai_exit) {
-          if (pthread_mutex_lock(&proc->mutex_dlsch_td1) != 0) {
-              LOG_E( PHY, "[SCHED][UE] error locking mutex for UE dlsch td\n" );
-              exit_fun("nothing to add");
-          }
-          while (proc->instance_cnt_dlsch_td1 < 0) {
-              // most of the time, the thread is waiting here
-              pthread_cond_wait( &proc->cond_dlsch_td1, &proc->mutex_dlsch_td1 );
-          }
-          if (pthread_mutex_unlock(&proc->mutex_dlsch_td1) != 0) {
-              LOG_E( PHY, "[SCHED][UE] error unlocking mutex for UE dlsch_td \n" );
-              exit_fun("nothing to add");
-          }
-
-          //printf("2thread1 main available %d\n", proc->decoder_main_available);
-
-          uint32_t wait = 0;
-                      while(proc->decoder_main_available == 0)
-                      {
-                              usleep(1);
-                              wait++;
-                      }
-            
-          //proc->decoder_thread_available1 = 0;
-          //PHY_VARS_NR_UE *phy_vars_ue       = tdp->UE;
-          int eNB_id                = proc->eNB_id;
-          int harq_pid              = proc->harq_pid;
-          llr8_flag1              = proc->llr8_flag;
-          //r_offset            = proc->Er;
-          //UE_rxtx_proc_t *proc        = tdp->proc;
-          int frame                       = proc->frame_rx;
-          int subframe              = proc->nr_tti_rx;
-          NR_UE_DLSCH_t *dlsch      = phy_vars_ue->dlsch[phy_vars_ue->current_thread_id[subframe]][eNB_id][0];
-          NR_DL_UE_HARQ_t *harq_process  = dlsch->harq_processes[harq_pid];
-          short *dlsch_llr        = phy_vars_ue->pdsch_vars[phy_vars_ue->current_thread_id[subframe]][eNB_id]->llr[0];
-          //printf("2thread0 llr flag %d tdp flag %d\n",llr8_flag1, tdp->llr8_flag);
-          //printf("2thread1 nr_tti_tx %d subframe %d SF thread id %d r_offset %d\n", proc->nr_tti_rx, subframe, phy_vars_ue->current_thread_id[subframe], r_offset);
-          p_nrLDPC_procBuf = harq_process->p_nrLDPC_procBuf[2];
-
-  /*
-  if (nb_rb > frame_parms->N_RB_DL) {
-    printf("dlsch_decoding.c: Illegal nb_rb %d\n",nb_rb);
-    return(max_ldpc_iterations);
-    }*/
-
-  /*harq_pid = dlsch->current_harq_pid[phy_vars_ue->current_thread_id[subframe]];
-  if (harq_pid >= 8) {
-    printf("dlsch_decoding.c: Illegal harq_pid %d\n",harq_pid);
-    return(max_ldpc_iterations);
-  }
-  */
-
-  nb_rb = harq_process->nb_rb;
-
-  harq_process->trials[harq_process->round]++;
-  harq_process->TBS = nr_compute_tbs(harq_process->mcs,nb_rb,nb_symb_sch,nb_re_dmrs,length_dmrs, harq_process->Nl);
-
-  A = harq_process->TBS; //2072 for QPSK 1/3
-
-  ret = dlsch->max_ldpc_iterations;
-
-  harq_process->G = nr_get_G(nb_rb, nb_symb_sch, nb_re_dmrs, length_dmrs, harq_process->Qm,harq_process->Nl);
-
-  G = harq_process->G;
-  //get_G(frame_parms,nb_rb,dlsch->rb_alloc,mod_order,num_pdcch_symbols,phy_vars_ue->frame,subframe);
-
-    //printf("DLSCH Decoding,  A %d harq_pid %d G %d\n",A, harq_pid,harq_process->G);
-
-  if (harq_process->round == 0) {
-    // This is a new packet, so compute quantities regarding segmentation
-    harq_process->B = A+24;
-    nr_segmentation(NULL,
-                    NULL,
-                    harq_process->B,
-                    &harq_process->C,
-                    &harq_process->K,
-              &harq_process->Z,
-                    &harq_process->F);
-                p_decParams->Z = harq_process->Z;
-
-    }
-
-    kb = harq_process->K/harq_process->Z;
-      if ( kb==22){
-        p_decParams->BG = 1;
-        p_decParams->R = 89;
-        kc = 68;
-      }
-      else{
-        p_decParams->BG = 2;
-        p_decParams->R = 13;
-        kc = 52;
-          }
-
-    p_decParams->numMaxIter = 2;
-    Kr = p_decParams->Z*kb;
-    p_decParams->outMode= 0;
-  /*
-  else {
-    printf("dlsch_decoding.c: Ndi>0 not checked yet!!\n");
-    return(max_ldpc_iterations);
-  }
-  */
-  err_flag = 0;
-  //r_offset = 0;
-
-  /*
-  unsigned char bw_scaling =1;
-
-  switch (frame_parms->N_RB_DL) {
-  case 106:
-    bw_scaling =2;
-    break;
-
-  default:
-    bw_scaling =1;
-    break;
-  }
-
-  if (harq_process->C > MAX_NUM_DLSCH_SEGMENTS/bw_scaling) {
-    LOG_E(PHY,"Illegal harq_process->C %d > %d\n",harq_process->C,MAX_NUM_DLSCH_SEGMENTS/bw_scaling);
-    return((1+dlsch->max_ldpc_iterations));
-  }*/
-#ifdef DEBUG_DLSCH_DECODING
-  printf("Segmentation: C %d, Cminus %d, Kminus %d, Kplus %d\n",harq_process->C,harq_process->Cminus,harq_process->Kminus,harq_process->Kplus);
-#endif
-
-  opp_enabled=1;
-  
-  Qm= harq_process->Qm;
-  Nl=harq_process->Nl;
-  //r_thread = harq_process->C/2-1;
-  C= harq_process->C;
-
-  Cprime = C; //assume CBGTI not present
-
-  if (r <= Cprime - ((G/(Nl*Qm))%Cprime) - 1)
-    r_offset = Nl*Qm*(G/(Nl*Qm*Cprime));
-  else
-    r_offset = Nl*Qm*((G/(Nl*Qm*Cprime))+1);
-
-           //printf("sub thread r_offset %d\n", r_offset);
-
-  //for (r=(harq_process->C/2); r<harq_process->C; r++) {
-         r=2; //(harq_process->C/2);
-       r_offset = r*r_offset;
-   //printf("thread1 r=%d r_offset %d \n",r, r_offset);
-
-
-      Kr = harq_process->K;
-      Kr_bytes = Kr>>3;
-      K_bytes_F = Kr_bytes-(harq_process->F>>3);
-
-      Tbslbrm = nr_compute_tbs(28,nb_rb,frame_parms->symbols_per_slot,0,0, harq_process->Nl);
-
-    E = nr_get_E(G, harq_process->C, harq_process->Qm, harq_process->Nl, r);
-
-    /*
-    printf("Subblock deinterleaving, d %p w %p\n",
-     harq_process->d[r],
-     harq_process->w);
-    */
-#if UE_TIMING_TRACE
-    start_meas(dlsch_deinterleaving_stats);
-#endif
-    nr_deinterleaving_ldpc(E,
-                           harq_process->Qm,
-                           harq_process->w[r],
-                           dlsch_llr+r_offset);
-
-    //for (int i =0; i<16; i++)
-    //          printf("rx output deinterleaving w[%d]= %d r_offset %d\n", i,harq_process->w[r][i], r_offset);
-
-#if UE_TIMING_TRACE
-    stop_meas(dlsch_deinterleaving_stats);
-#endif
-
-#if UE_TIMING_TRACE
-    start_meas(dlsch_rate_unmatching_stats);
-#endif
-
-#ifdef DEBUG_DLSCH_DECODING
-    LOG_D(PHY,"HARQ_PID %d Rate Matching Segment %d (coded bits %d,unpunctured/repeated bits %d, TBS %d, mod_order %d, nb_rb %d, Nl %d, rv %d, round %d)...\n",
-          harq_pid,r, G,
-          Kr*3,
-          harq_process->TBS,
-          harq_process->Qm,
-          harq_process->nb_rb,
-          harq_process->Nl,
-          harq_process->rvidx,
-          harq_process->round);
-#endif
-
-    if (nr_rate_matching_ldpc_rx(Ilbrm,
-             Tbslbrm,
-         p_decParams->BG,
-         p_decParams->Z,
-         harq_process->d[r],
-         harq_process->w[r],
-         harq_process->C,
-         harq_process->rvidx,
-         (harq_process->round==0)?1:0,
-         E)==-1) {
-#if UE_TIMING_TRACE
-      stop_meas(dlsch_rate_unmatching_stats);
-#endif
-      LOG_E(PHY,"dlsch_decoding.c: Problem in rate_matching\n");
-      //return(dlsch->max_ldpc_iterations);
-    } else
-    {
-#if UE_TIMING_TRACE
-      stop_meas(dlsch_rate_unmatching_stats);
-#endif
-    }
-
-    //for (int i =0; i<16; i++)
-    //      printf("rx output ratematching d[%d]= %d r_offset %d\n", i,harq_process->d[r][i], r_offset);
-
-    //r_offset += E;
-
-#ifdef DEBUG_DLSCH_DECODING
-    if (r==0) {
-              write_output("decoder_llr.m","decllr",dlsch_llr,G,1,0);
-              write_output("decoder_in.m","dec",&harq_process->d[0][0],(3*8*Kr_bytes)+12,1,0);
-    }
-
-    printf("decoder input(segment %d) :",r);
-    int i; for (i=0;i<(3*8*Kr_bytes)+12;i++)
-      printf("%d : %d\n",i,harq_process->d[r][i]);
-      printf("\n");
-#endif
-
-
-    //    printf("Clearing c, %p\n",harq_process->c[r]);
-    memset(harq_process->c[r],0,Kr_bytes);
-
-    //    printf("done\n");
-    if (harq_process->C == 1)
-      crc_type = CRC24_A;
-    else
-      crc_type = CRC24_B;
-
-    if (err_flag == 0) {
-/*
-        LOG_I(PHY, "turbo algo Kr=%d cb_cnt=%d C=%d nbRB=%d crc_type %d TBSInput=%d TBSHarq=%d TBSplus24=%d mcs=%d Qm=%d RIV=%d round=%d maxIter %d\n",
-                            Kr,r,harq_process->C,harq_process->nb_rb,crc_type,A,harq_process->TBS,
-                            harq_process->B,harq_process->mcs,harq_process->Qm,harq_process->rvidx,harq_process->round,dlsch->max_ldpc_iterations);
-*/
-      if (llr8_flag1) {
-        AssertFatal (Kr >= 256, "turbo algo issue Kr=%d cb_cnt=%d C=%d nbRB=%d TBSInput=%d TBSHarq=%d TBSplus24=%d mcs=%d Qm=%d RIV=%d round=%d\n",
-            Kr,r,harq_process->C,harq_process->nb_rb,A,harq_process->TBS,harq_process->B,harq_process->mcs,harq_process->Qm,harq_process->rvidx,harq_process->round);
-      }
-#if UE_TIMING_TRACE
-        start_meas(dlsch_turbo_decoding_stats);
-#endif
-//      LOG_D(PHY,"AbsSubframe %d.%d Start turbo segment %d/%d \n",frame%1024,subframe,r,harq_process->C-1);
-
-    memset(pv,0,2*p_decParams->Z*sizeof(int16_t));
-        //memset(pl,0,2*p_decParams->Z*sizeof(int8_t));
-    memset((pv+K_bytes_F),127,harq_process->F*sizeof(int16_t));
-
-        for (i=((2*p_decParams->Z)>>3), j = 0; i < K_bytes_F+((2*p_decParams->Z)>>3); i++, j++)
-        {
-          pv[i]= _mm_loadu_si128((__m128i*)(&inv_d[8*j]));
-        }
-
-    for (i=Kr_bytes+((2*p_decParams->Z)>>3),j=Kr_bytes; i < ((kc*p_decParams->Z)>>3); i++, j++)
-            {
-              pv[i]= _mm_loadu_si128((__m128i*)(&inv_d[8*j]));
-            }
-
-    for (i=0, j=0; j < ((kc*p_decParams->Z)>>4);  i+=2, j++)
-                {
-
-                  pl[j] = _mm_packs_epi16(pv[i],pv[i+1]);
-
-                }
-
-    no_iteration_ldpc = nrLDPC_decoder(p_decParams,
-               (int8_t*)&pl[0],
-               llrProcBuf,
-                           p_nrLDPC_procBuf,                
-               p_procTime);
-
-    if (no_iteration_ldpc > 10)
-      printf("Error number of iteration LPDC %d\n", no_iteration_ldpc);
-    //else
-    //  printf("OK number of iteration LPDC %d\n", no_iteration_ldpc);
-
-    for (int m=0; m < Kr>>3; m ++)
-                    {
-                  harq_process->c[r][m]= (uint8_t) llrProcBuf[m];
-                    }
-
-            /*for (int u=0; u < Kr>>3; u ++)
-                            {
-                      ullrProcBuf[u]= (uint8_t) llrProcBuf[u];
-                            }
-
-
-            printf("output unsigned ullrProcBuf \n");
-
-            for (int j=0; j < Kr>>3; j ++)
-                                    {
-
-                              printf(" %d \n", ullrProcBuf[j]);
-
-                                    }
-          printf(" \n");*/
-#endif
-    //printf("output channel decoder %d %d %d %d %d \n", harq_process->c[r][0], harq_process->c[r][1], harq_process->c[r][2],harq_process->c[r][3], harq_process->c[r][4]);
-
-     //printf("output decoder %d %d %d %d %d \n", harq_process->c[r][0], harq_process->c[r][1], harq_process->c[r][2],harq_process->c[r][3], harq_process->c[r][4]);
-
-#if UE_TIMING_TRACE
-      stop_meas(dlsch_turbo_decoding_stats);
-#endif
-    }
-
-    if ((err_flag == 0) && (ret>=(1+dlsch->max_ldpc_iterations))) {// a Code segment is in error so break;
-//      LOG_D(PHY,"AbsSubframe %d.%d CRC failed, segment %d/%d \n",frame%1024,subframe,r,harq_process->C-1);
-      err_flag = 1;
-    }
-  //}
-
-  /*int32_t frame_rx_prev = frame;
-  int32_t subframe_rx_prev = subframe - 1;
-  if (subframe_rx_prev < 0) {
-    frame_rx_prev--;
-    subframe_rx_prev += 10;
-  }
-  frame_rx_prev = frame_rx_prev%1024;*/
-
-
-  proc->decoder_thread_available1 = 1;
-  //proc->decoder_main_available = 0;
-
-  //printf("2thread1 proc->instance_cnt_dlsch_td1 %d\n", proc->instance_cnt_dlsch_td1);
-
-  if (pthread_mutex_lock(&proc->mutex_dlsch_td1) != 0) {
-              LOG_E( PHY, "[SCHED][UE] error locking mutex for UE RXTX\n" );
-              exit_fun("noting to add");
-          }
-          proc->instance_cnt_dlsch_td1--;
-          if (pthread_mutex_unlock(&proc->mutex_dlsch_td1) != 0) {
-              LOG_E( PHY, "[SCHED][UE] error unlocking mutex for UE td1\n" );
-              exit_fun("noting to add");
-          }
-          //printf("end 2thread1 proc->instance_cnt_dlsch_td1 %d\n", proc->instance_cnt_dlsch_td1);
-      }
-
-  //printf("after 2thread1 after oai exit proc->instance_cnt_dlsch_td %d\n", proc->instance_cnt_dlsch_td1);
-      // thread finished
-          free(arg);
-          return &UE_dlsch_td_retval1;
-}
 #endif
-
diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_transport_proto_ue.h b/openair1/PHY/NR_UE_TRANSPORT/nr_transport_proto_ue.h
index b6827371801..155e08d4774 100644
--- a/openair1/PHY/NR_UE_TRANSPORT/nr_transport_proto_ue.h
+++ b/openair1/PHY/NR_UE_TRANSPORT/nr_transport_proto_ue.h
@@ -1818,6 +1818,6 @@ int nr_extract_dci_info(PHY_VARS_NR_UE *ue,
 			uint16_t n_RB_DLBWP,
 			uint16_t crc_scrambled_values[TOTAL_NBR_SCRAMBLED_VALUES]);
 
-
+void *dlsch_thread(void *arg);
 /**@}*/
 #endif
diff --git a/openair1/PHY/defs_common.h b/openair1/PHY/defs_common.h
index 3216297f59f..c9319b1ac57 100644
--- a/openair1/PHY/defs_common.h
+++ b/openair1/PHY/defs_common.h
@@ -80,6 +80,7 @@
 
 #define RX_NB_TH_MAX 2
 #define RX_NB_TH 2
+#define RX_NB_TH_DL 2
 
 #define LTE_SLOTS_PER_SUBFRAME 2
 
diff --git a/openair1/PHY/defs_nr_UE.h b/openair1/PHY/defs_nr_UE.h
index 5376a427705..2fc7b784756 100644
--- a/openair1/PHY/defs_nr_UE.h
+++ b/openair1/PHY/defs_nr_UE.h
@@ -1221,10 +1221,10 @@ typedef struct {
 /* this structure is used to pass both UE phy vars and
  * proc to the function UE_thread_rxn_txnp4
  */
-struct nr_rxtx_thread_data {
+typedef struct nr_rxtx_thread_data_s {
+  UE_nr_rxtx_proc_t proc;
   PHY_VARS_NR_UE    *UE;
-  UE_nr_rxtx_proc_t *proc;
-};
+}  nr_rxtx_thread_data_t;
 
 /*static inline int wait_on_condition(pthread_mutex_t *mutex,pthread_cond_t *cond,int *instance_cnt,char *name) {
 
diff --git a/openair1/PHY/thread_NR_UE.h b/openair1/PHY/thread_NR_UE.h
index b18ff3d5b7d..94271ed55d3 100644
--- a/openair1/PHY/thread_NR_UE.h
+++ b/openair1/PHY/thread_NR_UE.h
@@ -60,7 +60,7 @@ typedef struct {
   uint8_t decoder_thread_available;
   uint8_t decoder_main_available;
   uint8_t decoder_switch;
-  int counter_decoder;
+  int num_seg;
   uint8_t channel_level;
   int eNB_id;
   int harq_pid;
diff --git a/openair1/SCHED_NR_UE/defs.h b/openair1/SCHED_NR_UE/defs.h
index 28bb2c50645..63526f15b85 100644
--- a/openair1/SCHED_NR_UE/defs.h
+++ b/openair1/SCHED_NR_UE/defs.h
@@ -44,7 +44,7 @@
 #define OPENAIR_THREAD_STACK_SIZE     PTHREAD_STACK_MIN //4096 //RTL_PTHREAD_STACK_MIN*6
 //#define DLC_THREAD_STACK_SIZE        4096 //DLC stack size
 //#define UE_SLOT_PARALLELISATION
-//#define UE_DLSCH_PARALLELISATION
+#define UE_DLSCH_PARALLELISATION//
 
 /*enum openair_SCHED_STATUS {
   openair_SCHED_STOPPED=1,
-- 
GitLab