diff --git a/ci-scripts/conf_files/gnb.band78.sa.fr1.106PRB.2x2.usrpn310.conf b/ci-scripts/conf_files/gnb.band78.sa.fr1.106PRB.2x2.usrpn310.conf
index 0e8466e2dff4f0764b5884dc99276e45e0630da5..e8be5a6ef0ae7d41f6e2b90f03f043df30d46f50 100644
--- a/ci-scripts/conf_files/gnb.band78.sa.fr1.106PRB.2x2.usrpn310.conf
+++ b/ci-scripts/conf_files/gnb.band78.sa.fr1.106PRB.2x2.usrpn310.conf
@@ -263,7 +263,7 @@ L1s = (
       {
       num_cc = 1;
       tr_n_preference = "local_mac";
-      pusch_proc_threads = 2;
+      pusch_proc_threads = 4;
       prach_dtx_threshold = 120;
 #      pucch0_dtx_threshold = 150;
       }
diff --git a/ci-scripts/conf_files/gnb.band78.tm1.fr1.106PRB.usrpb210.conf b/ci-scripts/conf_files/gnb.band78.tm1.fr1.106PRB.usrpb210.conf
index a72147ee99663b553aae96aee9c476366a45927a..015d9ee0d0ac50f8489940c4851da7ef4a81ac01 100644
--- a/ci-scripts/conf_files/gnb.band78.tm1.fr1.106PRB.usrpb210.conf
+++ b/ci-scripts/conf_files/gnb.band78.tm1.fr1.106PRB.usrpb210.conf
@@ -250,6 +250,7 @@ RUs = (
     max_pdschReferenceSignalPower = -27;
     max_rxgain     = 111;
     eNB_instances  = [0];
+#    sdr_addrs = "serial=30C51D4";
 #    clock_src      = "external";
   }
 );  
diff --git a/ci-scripts/sshconnection.py b/ci-scripts/sshconnection.py
index 65e9c961d0a00a00e5a0f48cdc719aeac4bca98d..67e0b832e66210e359690ff0a388c5013c3d1c5c 100644
--- a/ci-scripts/sshconnection.py
+++ b/ci-scripts/sshconnection.py
@@ -100,7 +100,7 @@ class SSHConnection():
 				time.sleep(1)
 			count += 1
 		if connect_status:
-			pass
+			self.command('unset HISTFILE', '\$', 5, silent=True)
 		else:
 			sys.exit('SSH Connection Failed')
 		self.ipaddress = ipaddress
diff --git a/ci-scripts/xml_files/fr1_nsa_quectel.xml b/ci-scripts/xml_files/fr1_nsa_quectel.xml
index ba1e478efcf795dd231c64317ece6d30fd59af11..2b64b9bbe5e08d7a72929b423ce37b7d42eac0f5 100644
--- a/ci-scripts/xml_files/fr1_nsa_quectel.xml
+++ b/ci-scripts/xml_files/fr1_nsa_quectel.xml
@@ -73,7 +73,7 @@
 	<testCase id="040000">
 		<class>Initialize_eNB</class>
 		<desc>Initialize gNB</desc>
-		<Initialize_eNB_args>-O ci-scripts/conf_files/gnb.band78.tm1.fr1.106PRB.usrpb210.conf -E -q --usrp-args "serial=30C51D4"</Initialize_eNB_args>
+		<Initialize_eNB_args>-O ci-scripts/conf_files/gnb.band78.tm1.fr1.106PRB.usrpb210.conf -E -q --RUs.[0].sdr_addrs "serial=30C51D4"</Initialize_eNB_args>
 		<eNB_instance>1</eNB_instance>
 		<eNB_serverId>1</eNB_serverId>
 		<air_interface>nr</air_interface>
diff --git a/ci-scripts/yaml_files/nsa_b200_gnb/docker-compose.yml b/ci-scripts/yaml_files/nsa_b200_gnb/docker-compose.yml
index c00cf0d2c224ca5455e1d000b7c874a0854c10ee..4ae51158ac2a0783c7627b6769f16788e6169f1e 100644
--- a/ci-scripts/yaml_files/nsa_b200_gnb/docker-compose.yml
+++ b/ci-scripts/yaml_files/nsa_b200_gnb/docker-compose.yml
@@ -26,7 +26,7 @@ services:
             FLEXRAN_INTERFACE_NAME: eth0
             FLEXRAN_IPV4_ADDRESS: 192.168.18.210
             THREAD_PARALLEL_CONFIG: PARALLEL_RU_L1_TRX_SPLIT
-            USE_ADDITIONAL_OPTIONS: '-E -q --usrp-args "serial=30C51D4"'
+            USE_ADDITIONAL_OPTIONS: -E -q --RUs.[0].sdr_addrs serial=30C51D4
         volumes:
             - /dev:/dev
         networks:
diff --git a/common/utils/T/T_messages.txt b/common/utils/T/T_messages.txt
index cd66be73f5ac650da9b6a25abfb44b1447b2f7c0..192bf8cc371ff0da11ddbfe3304d8ebf1407320a 100644
--- a/common/utils/T/T_messages.txt
+++ b/common/utils/T/T_messages.txt
@@ -155,6 +155,10 @@ ID = GNB_MAC_DL_RAR_PDU_WITH_DATA
     DESC = NR MAC downlink PDU for RAR
     GROUP = ALL:MAC:GNB:WIRESHARK
     FORMAT = int,gNB_ID : int,CC_id : int,rnti : int,frame : int,slot : int,harq_pid : buffer,data
+ID = GNB_MAC_RETRANSMISSION_DL_PDU_WITH_DATA
+    DESC = NR MAC downlink retransmitted PDU for an UE
+    GROUP = ALL:MAC:GNB:WIRESHARK
+    FORMAT = int,gNB_ID : int,CC_id : int,rnti : int,frame : int,slot : int,harq_pid : int,round : buffer,data
 
 #RLC logs
 ID = ENB_RLC_DL
diff --git a/common/utils/T/tracer/macpdu2wireshark.c b/common/utils/T/tracer/macpdu2wireshark.c
index d04f2c317d8c817b91ccb078264f4a3443549eb9..b9284748e411388d97cdec713bfc5e26659aa2dd 100644
--- a/common/utils/T/tracer/macpdu2wireshark.c
+++ b/common/utils/T/tracer/macpdu2wireshark.c
@@ -59,12 +59,20 @@ typedef struct {
   int nr_ul_rnti;
   int nr_ul_frame;
   int nr_ul_slot;
+  int nr_ul_harq_pid;
   int nr_ul_data;
   /* NR dl */
   int nr_dl_rnti;
   int nr_dl_frame;
   int nr_dl_slot;
+  int nr_dl_harq_pid;
   int nr_dl_data;
+  /* NR dl retx */
+  int nr_dl_retx_rnti;
+  int nr_dl_retx_frame;
+  int nr_dl_retx_slot;
+  int nr_dl_retx_harq_pid;
+  int nr_dl_retx_data;
   /* NR mib */
   int nr_mib_frame;
   int nr_mib_slot;
@@ -254,6 +262,7 @@ void sr(void *_d, event e)
 #define MAC_NR_PAYLOAD_TAG    0x01
 #define MAC_NR_RNTI_TAG       0x02
 #define MAC_NR_UEID_TAG       0x03
+#define MAC_NR_HARQID         0x06
 #define MAC_NR_FRAME_SLOT_TAG 0x07
 
 #define NR_FDD_RADIO 1
@@ -267,7 +276,8 @@ void sr(void *_d, event e)
 #define NR_C_RNTI  3
 
 void trace_nr(ev_data *d, int direction, int rnti_type, int rnti,
-        int frame, int slot, void *buf, int bufsize, int preamble)
+        int frame, int slot, int harq_pid, void *buf, int bufsize,
+        int preamble)
 {
   ssize_t ret;
   int i;
@@ -307,6 +317,8 @@ void trace_nr(ev_data *d, int direction, int rnti_type, int rnti,
   PUTC(&d->buf, frame & 255);
   PUTC(&d->buf, (slot>>8) & 255);
   PUTC(&d->buf, slot & 255);
+  PUTC(&d->buf, MAC_NR_HARQID);
+  PUTC(&d->buf, harq_pid);
 #endif
 
   PUTC(&d->buf, MAC_NR_PAYLOAD_TAG);
@@ -326,7 +338,8 @@ void nr_ul(void *_d, event e)
 
   trace_nr(d, NR_DIRECTION_UPLINK, NR_C_RNTI, e.e[d->nr_ul_rnti].i,
            e.e[d->nr_ul_frame].i, e.e[d->nr_ul_slot].i,
-           e.e[d->nr_ul_data].b, e.e[d->nr_ul_data].bsize, NO_PREAMBLE);
+           e.e[d->nr_ul_harq_pid].i, e.e[d->nr_ul_data].b,
+           e.e[d->nr_ul_data].bsize, NO_PREAMBLE);
 }
 
 void nr_dl(void *_d, event e)
@@ -335,7 +348,18 @@ void nr_dl(void *_d, event e)
 
   trace_nr(d, NR_DIRECTION_DOWNLINK, NR_C_RNTI, e.e[d->nr_dl_rnti].i,
            e.e[d->nr_dl_frame].i, e.e[d->nr_dl_slot].i,
-           e.e[d->nr_dl_data].b, e.e[d->nr_dl_data].bsize, NO_PREAMBLE);
+           e.e[d->nr_dl_harq_pid].i, e.e[d->nr_dl_data].b,
+           e.e[d->nr_dl_data].bsize, NO_PREAMBLE);
+}
+
+void nr_dl_retx(void *_d, event e)
+{
+  ev_data *d = _d;
+
+  trace_nr(d, NR_DIRECTION_DOWNLINK, NR_C_RNTI, e.e[d->nr_dl_retx_rnti].i,
+           e.e[d->nr_dl_retx_frame].i, e.e[d->nr_dl_retx_slot].i,
+           e.e[d->nr_dl_retx_harq_pid].i, e.e[d->nr_dl_retx_data].b,
+           e.e[d->nr_dl_retx_data].bsize, NO_PREAMBLE);
 }
 
 void nr_mib(void *_d, event e)
@@ -349,7 +373,7 @@ void nr_mib(void *_d, event e)
   d->cur_mib++;
 
   trace_nr(d, NR_DIRECTION_DOWNLINK, NR_NO_RNTI, 0,
-           e.e[d->nr_mib_frame].i, e.e[d->nr_mib_slot].i,
+           e.e[d->nr_mib_frame].i, e.e[d->nr_mib_slot].i, 0 /* harq pid */,
            e.e[d->nr_mib_data].b, e.e[d->nr_mib_data].bsize, NO_PREAMBLE);
 }
 
@@ -358,7 +382,7 @@ void nr_rar(void *_d, event e)
   ev_data *d = _d;
 
   trace_nr(d, NR_DIRECTION_DOWNLINK, NR_RA_RNTI, e.e[d->nr_rar_rnti].i,
-           e.e[d->nr_rar_frame].i, e.e[d->nr_rar_slot].i,
+           e.e[d->nr_rar_frame].i, e.e[d->nr_rar_slot].i, 0 /* harq pid */,
            e.e[d->nr_rar_data].b, e.e[d->nr_rar_data].bsize, NO_PREAMBLE);
 }
 
@@ -367,47 +391,55 @@ void nr_rar(void *_d, event e)
 
 void setup_data(ev_data *d, void *database, int ul_id, int dl_id, int mib_id,
                 int preamble_id, int rar_id, int sr_id,
-                int nr_ul_id, int nr_dl_id, int nr_mib_id, int nr_rar_id)
+                int nr_ul_id, int nr_dl_id, int nr_dl_retx_id, int nr_mib_id,
+                int nr_rar_id)
 {
   database_event_format f;
   int i;
-  d->ul_rnti           = -1;
-  d->ul_frame          = -1;
-  d->ul_subframe       = -1;
-  d->ul_data           = -1;
-  d->dl_rnti           = -1;
-  d->dl_frame          = -1;
-  d->dl_subframe       = -1;
-  d->dl_data           = -1;
-  d->mib_frame         = -1;
-  d->mib_subframe      = -1;
-  d->mib_data          = -1;
-  d->preamble_frame    = -1;
-  d->preamble_subframe = -1;
-  d->preamble_preamble = -1;
-  d->rar_rnti          = -1;
-  d->rar_frame         = -1;
-  d->rar_subframe      = -1;
-  d->rar_data          = -1;
-  d->sr_rnti           = -1;
-  d->sr_frame          = -1;
-  d->sr_subframe       = -1;
-
-  d->nr_ul_rnti        = -1;
-  d->nr_ul_frame       = -1;
-  d->nr_ul_slot        = -1;
-  d->nr_ul_data        = -1;
-  d->nr_dl_rnti        = -1;
-  d->nr_dl_frame       = -1;
-  d->nr_dl_slot        = -1;
-  d->nr_dl_data        = -1;
-  d->nr_mib_frame      = -1;
-  d->nr_mib_slot       = -1;
-  d->nr_mib_data       = -1;
-  d->nr_rar_rnti       = -1;
-  d->nr_rar_frame      = -1;
-  d->nr_rar_slot       = -1;
-  d->nr_rar_data       = -1;
+  d->ul_rnti             = -1;
+  d->ul_frame            = -1;
+  d->ul_subframe         = -1;
+  d->ul_data             = -1;
+  d->dl_rnti             = -1;
+  d->dl_frame            = -1;
+  d->dl_subframe         = -1;
+  d->dl_data             = -1;
+  d->mib_frame           = -1;
+  d->mib_subframe        = -1;
+  d->mib_data            = -1;
+  d->preamble_frame      = -1;
+  d->preamble_subframe   = -1;
+  d->preamble_preamble   = -1;
+  d->rar_rnti            = -1;
+  d->rar_frame           = -1;
+  d->rar_subframe        = -1;
+  d->rar_data            = -1;
+  d->sr_rnti             = -1;
+  d->sr_frame            = -1;
+  d->sr_subframe         = -1;
+
+  d->nr_ul_rnti          = -1;
+  d->nr_ul_frame         = -1;
+  d->nr_ul_slot          = -1;
+  d->nr_ul_harq_pid      = -1;
+  d->nr_ul_data          = -1;
+  d->nr_dl_rnti          = -1;
+  d->nr_dl_frame         = -1;
+  d->nr_dl_slot          = -1;
+  d->nr_dl_harq_pid      = -1;
+  d->nr_dl_data          = -1;
+  d->nr_dl_retx_rnti     = -1;
+  d->nr_dl_retx_frame    = -1;
+  d->nr_dl_retx_slot     = -1;
+  d->nr_dl_retx_harq_pid = -1;
+  d->nr_dl_retx_data     = -1;
+  d->nr_mib_frame        = -1;
+  d->nr_mib_slot         = -1;
+  d->nr_mib_data         = -1;
+  d->nr_rar_rnti         = -1;
+  d->nr_rar_frame        = -1;
+  d->nr_rar_slot         = -1;
+  d->nr_rar_data         = -1;
 
 #define G(var_name, var_type, var) \
   if (!strcmp(f.name[i], var_name)) { \
@@ -490,32 +522,50 @@ void setup_data(ev_data *d, void *database, int ul_id, int dl_id, int mib_id,
   if (d->sr_rnti == -1 || d->sr_frame == -1 || d->sr_subframe == -1)
     goto error;
 
-  /* NR ul: rnti, frame, slot, data */
+  /* NR ul: rnti, frame, slot, harq_pid, data */
   f = get_format(database, nr_ul_id);
 
   for (i = 0; i < f.count; i++) {
-    G("rnti",  "int",    d->nr_ul_rnti);
-    G("frame", "int",    d->nr_ul_frame);
-    G("slot",  "int",    d->nr_ul_slot);
-    G("data",  "buffer", d->nr_ul_data);
+    G("rnti",     "int",    d->nr_ul_rnti);
+    G("frame",    "int",    d->nr_ul_frame);
+    G("slot",     "int",    d->nr_ul_slot);
+    G("harq_pid", "int",    d->nr_ul_harq_pid);
+    G("data",     "buffer", d->nr_ul_data);
   }
 
   if (d->nr_ul_rnti == -1 || d->nr_ul_frame == -1 || d->nr_ul_slot == -1 ||
-      d->nr_ul_data == -1)
+      d->nr_ul_harq_pid == -1 || d->nr_ul_data == -1)
     goto error;
 
-  /* NR dl: rnti, frame, slot, data */
+  /* NR dl: rnti, frame, slot, harq_pid, data */
   f = get_format(database, nr_dl_id);
 
   for (i = 0; i < f.count; i++) {
-    G("rnti",  "int",    d->nr_dl_rnti);
-    G("frame", "int",    d->nr_dl_frame);
-    G("slot",  "int",    d->nr_dl_slot);
-    G("data",  "buffer", d->nr_dl_data);
+    G("rnti",     "int",    d->nr_dl_rnti);
+    G("frame",    "int",    d->nr_dl_frame);
+    G("slot",     "int",    d->nr_dl_slot);
+    G("harq_pid", "int",    d->nr_dl_harq_pid);
+    G("data",     "buffer", d->nr_dl_data);
   }
 
   if (d->nr_dl_rnti == -1 || d->nr_dl_frame == -1 || d->nr_dl_slot == -1 ||
-      d->nr_dl_data == -1)
+      d->nr_dl_harq_pid == -1 || d->nr_dl_data == -1)
+    goto error;
+
+  /* NR dl retx: rnti, frame, slot, harq_pid, data */
+  f = get_format(database, nr_dl_retx_id);
+
+  for (i = 0; i < f.count; i++) {
+    G("rnti",     "int",    d->nr_dl_retx_rnti);
+    G("frame",    "int",    d->nr_dl_retx_frame);
+    G("slot",     "int",    d->nr_dl_retx_slot);
+    G("harq_pid", "int",    d->nr_dl_retx_harq_pid);
+    G("data",     "buffer", d->nr_dl_retx_data);
+  }
+
+  if (d->nr_dl_retx_rnti == -1 || d->nr_dl_retx_frame == -1 ||
+      d->nr_dl_retx_slot == -1 || d->nr_dl_retx_harq_pid == -1 ||
+      d->nr_dl_retx_data == -1)
     goto error;
 
   /* NR MIB: frame, slot, data */
@@ -612,7 +662,7 @@ int main(int n, char **v)
   int in;
   int i;
   int ul_id, dl_id, mib_id, preamble_id, rar_id;
-  int nr_ul_id, nr_dl_id, nr_mib_id, nr_rar_id;
+  int nr_ul_id, nr_dl_id, nr_dl_retx_id, nr_mib_id, nr_rar_id;
   int sr_id;
   ev_data d;
   char *ip = DEFAULT_IP;
@@ -692,6 +742,7 @@ int main(int n, char **v)
 
     on_off(database, "GNB_MAC_UL_PDU_WITH_DATA", is_on, 1);
     on_off(database, "GNB_MAC_DL_PDU_WITH_DATA", is_on, 1);
+    on_off(database, "GNB_MAC_RETRANSMISSION_DL_PDU_WITH_DATA", is_on, 1);
     on_off(database, "GNB_PHY_MIB", is_on, 1);
     on_off(database, "GNB_MAC_DL_RAR_PDU_WITH_DATA", is_on, 1);
 
@@ -715,11 +766,12 @@ int main(int n, char **v)
 
   nr_ul_id = event_id_from_name(database, "GNB_MAC_UL_PDU_WITH_DATA");
   nr_dl_id = event_id_from_name(database, "GNB_MAC_DL_PDU_WITH_DATA");
+  nr_dl_retx_id = event_id_from_name(database, "GNB_MAC_RETRANSMISSION_DL_PDU_WITH_DATA");
   nr_mib_id = event_id_from_name(database, "GNB_PHY_MIB");
   nr_rar_id = event_id_from_name(database, "GNB_MAC_DL_RAR_PDU_WITH_DATA");
 
   setup_data(&d, database, ul_id, dl_id, mib_id, preamble_id, rar_id, sr_id,
-             nr_ul_id, nr_dl_id, nr_mib_id, nr_rar_id);
+             nr_ul_id, nr_dl_id, nr_dl_retx_id, nr_mib_id, nr_rar_id);
 
   register_handler_function(h, ul_id, ul, &d);
   register_handler_function(h, dl_id, dl, &d);
@@ -730,6 +782,7 @@ int main(int n, char **v)
 
   register_handler_function(h, nr_ul_id, nr_ul, &d);
   register_handler_function(h, nr_dl_id, nr_dl, &d);
+  register_handler_function(h, nr_dl_retx_id, nr_dl_retx, &d);
   register_handler_function(h, nr_mib_id, nr_mib, &d);
   register_handler_function(h, nr_rar_id, nr_rar, &d);
 
@@ -753,10 +806,11 @@ int main(int n, char **v)
 
     if (e.type == -1) break;
 
-    if (!(e.type == ul_id       || e.type == dl_id    || e.type == mib_id ||
-          e.type == preamble_id || e.type == rar_id   || e.type == sr_id  ||
-          e.type == nr_ul_id    || e.type == nr_dl_id ||
-          e.type == nr_mib_id   || e.type == nr_rar_id)) continue;
+    if (!(e.type == ul_id         || e.type == dl_id     || e.type == mib_id ||
+          e.type == preamble_id   || e.type == rar_id    || e.type == sr_id  ||
+          e.type == nr_ul_id      || e.type == nr_dl_id  ||
+          e.type == nr_dl_retx_id || e.type == nr_mib_id ||
+          e.type == nr_rar_id)) continue;
 
     handle_event(h, e);
   }
diff --git a/docker/Dockerfile.eNB.ubuntu18 b/docker/Dockerfile.eNB.ubuntu18
index cf748784e597a8780ffce23cf51ef647be601d1a..360bf2e7cf17e51a96791b1b41ebf2ee545500fe 100644
--- a/docker/Dockerfile.eNB.ubuntu18
+++ b/docker/Dockerfile.eNB.ubuntu18
@@ -96,6 +96,7 @@ COPY --from=enb-build /usr/lib/x86_64-linux-gnu/libboost_regex.so.1.65.1 .
 COPY --from=enb-build /usr/lib/x86_64-linux-gnu/libboost_serialization.so.1.65.1 .
 COPY --from=enb-build /usr/lib/x86_64-linux-gnu/libboost_thread.so.1.65.1 .
 COPY --from=enb-build /usr/lib/x86_64-linux-gnu/libboost_system.so.1.65.1 .
+COPY --from=enb-build /usr/lib/x86_64-linux-gnu/libboost_program_options.so.1.65.1 .
 
 RUN ldconfig
 
diff --git a/docker/Dockerfile.gNB.ubuntu18 b/docker/Dockerfile.gNB.ubuntu18
index cfd9d8214491c3f0d4ac4d9cd16a42e775cecc4b..030ce80ede6422f711dcdd13d7b95ec130c662ed 100644
--- a/docker/Dockerfile.gNB.ubuntu18
+++ b/docker/Dockerfile.gNB.ubuntu18
@@ -100,6 +100,7 @@ COPY --from=gnb-build /usr/lib/x86_64-linux-gnu/libboost_regex.so.1.65.1 .
 COPY --from=gnb-build /usr/lib/x86_64-linux-gnu/libboost_serialization.so.1.65.1 .
 COPY --from=gnb-build /usr/lib/x86_64-linux-gnu/libboost_thread.so.1.65.1 .
 COPY --from=gnb-build /usr/lib/x86_64-linux-gnu/libboost_system.so.1.65.1 .
+COPY --from=gnb-build /usr/lib/x86_64-linux-gnu/libboost_program_options.so.1.65.1 .
 
 RUN ldconfig
 
diff --git a/docker/Dockerfile.lteUE.ubuntu18 b/docker/Dockerfile.lteUE.ubuntu18
index 6d54ab08fc79ca8f38bca7e181df123f336c47f0..31a3ae41ae4787fcaa5e98b05a9a45688703c631 100644
--- a/docker/Dockerfile.lteUE.ubuntu18
+++ b/docker/Dockerfile.lteUE.ubuntu18
@@ -101,6 +101,7 @@ COPY --from=lte-ue-build /usr/lib/x86_64-linux-gnu/libboost_regex.so.1.65.1 .
 COPY --from=lte-ue-build /usr/lib/x86_64-linux-gnu/libboost_serialization.so.1.65.1 .
 COPY --from=lte-ue-build /usr/lib/x86_64-linux-gnu/libboost_thread.so.1.65.1 .
 COPY --from=lte-ue-build /usr/lib/x86_64-linux-gnu/libboost_system.so.1.65.1 .
+COPY --from=lte-ue-build /usr/lib/x86_64-linux-gnu/libboost_program_options.so.1.65.1 .
 
 RUN ldconfig
 
diff --git a/docker/Dockerfile.nrUE.ubuntu18 b/docker/Dockerfile.nrUE.ubuntu18
index 49e3f3213ba779cd78e0e0c55eecba3acb80ae3e..38d437940ca5322e54e70925b8d8b2106c2c5ee9 100644
--- a/docker/Dockerfile.nrUE.ubuntu18
+++ b/docker/Dockerfile.nrUE.ubuntu18
@@ -102,6 +102,7 @@ COPY --from=nr-ue-build /usr/lib/x86_64-linux-gnu/libboost_regex.so.1.65.1 .
 COPY --from=nr-ue-build /usr/lib/x86_64-linux-gnu/libboost_serialization.so.1.65.1 .
 COPY --from=nr-ue-build /usr/lib/x86_64-linux-gnu/libboost_thread.so.1.65.1 .
 COPY --from=nr-ue-build /usr/lib/x86_64-linux-gnu/libboost_system.so.1.65.1 .
+COPY --from=nr-ue-build /usr/lib/x86_64-linux-gnu/libboost_program_options.so.1.65.1 .
 
 RUN ldconfig
 
diff --git a/executables/nr-ue.c b/executables/nr-ue.c
index 77fc7d171b72bb412515154ad5ba998d4cbdeb42..60ff40abeb16f8eaaf5a49c1d55085a31291c6cf 100644
--- a/executables/nr-ue.c
+++ b/executables/nr-ue.c
@@ -778,12 +778,6 @@ void init_NR_UE_threads(int nb_inst) {
 
     LOG_I(PHY,"Intializing UE Threads for instance %d (%p,%p)...\n",inst,PHY_vars_UE_g[inst],PHY_vars_UE_g[inst][0]);
     threadCreate(&threads[inst], UE_thread, (void *)UE, "UEthread", -1, OAI_PRIORITY_RT_MAX);
-
-     if(get_nrUE_params()->nr_dlsch_parallel)
-     {
-       pthread_t dlsch0_threads;
-       threadCreate(&dlsch0_threads, dlsch_thread, (void *)UE, "DLthread", -1, OAI_PRIORITY_RT_MAX-1);
-     }
   }
 }
 
diff --git a/openair1/PHY/NR_TRANSPORT/pucch_rx.c b/openair1/PHY/NR_TRANSPORT/pucch_rx.c
index 2491c24ab679d89613c724fab493b0f9721e7e6f..b81e655817b35d0f8b017e7f1e24e8525c40a314 100644
--- a/openair1/PHY/NR_TRANSPORT/pucch_rx.c
+++ b/openair1/PHY/NR_TRANSPORT/pucch_rx.c
@@ -429,8 +429,8 @@ void nr_decode_pucch0(PHY_VARS_gNB *gNB,
     uci_pdu->harq->num_harq = 1;
     uci_pdu->harq->harq_confidence_level = no_conf ? 1 : 0;
     uci_pdu->harq->harq_list = (nfapi_nr_harq_t*)malloc(1);
-    uci_pdu->harq->harq_list[0].harq_value = index&0x01;
-    LOG_D(PHY, "[DLSCH/PDSCH/PUCCH] %d.%d HARQ value %d with confidence level (0 is good, 1 is bad) %d xrt_mag %d xrt_mag_next %d n0 %d (%d,%d) pucch0_thres %d, cqi %d, SNRtimes10 %d, energy %f, sync_pos %d\n",
+    uci_pdu->harq->harq_list[0].harq_value = !(index&0x01);
+    LOG_D(PHY, "[DLSCH/PDSCH/PUCCH] %d.%d HARQ value %d (0 pass, 1 fail) with confidence level %d (0 is good, 1 is bad) xrt_mag %d xrt_mag_next %d n0 %d (%d,%d) pucch0_thres %d, cqi %d, SNRtimes10 %d, energy %f, sync_pos %d\n",
           frame,slot,uci_pdu->harq->harq_list[0].harq_value,uci_pdu->harq->harq_confidence_level,xrtmag_dBtimes10,xrtmag_next_dBtimes10,max_n0,uci_stats->pucch0_n00,uci_stats->pucch0_n01,uci_stats->pucch0_thres,cqi,SNRtimes10,10*log10((double)sigenergy),gNB->ulsch_stats[0].sync_pos);
     if (pucch_pdu->sr_flag == 1) {
       uci_pdu->sr = calloc(1,sizeof(*uci_pdu->sr));
@@ -445,9 +445,9 @@ void nr_decode_pucch0(PHY_VARS_gNB *gNB,
     uci_pdu->harq->num_harq = 2;
     uci_pdu->harq->harq_confidence_level = (no_conf) ? 1 : 0;
     uci_pdu->harq->harq_list = (nfapi_nr_harq_t*)malloc(2);
-    uci_pdu->harq->harq_list[1].harq_value = index&0x01;
-    uci_pdu->harq->harq_list[0].harq_value = (index>>1)&0x01;
-    LOG_D(PHY, "[DLSCH/PDSCH/PUCCH] %d.%d HARQ values %d and %d with confidence level (0 is good, 1 is bad) %d, xrt_mag %d xrt_mag_next %d n0 %d (%d,%d) pucch0_thres %d, cqi %d, SNRtimes10 %d,sync_pos %d\n",
+    uci_pdu->harq->harq_list[1].harq_value = !(index&0x01);
+    uci_pdu->harq->harq_list[0].harq_value = !((index>>1)&0x01);
+    LOG_D(PHY, "[DLSCH/PDSCH/PUCCH] %d.%d HARQ values %d (0 pass, 1 fail) and %d with confidence level %d (0 is good, 1 is bad), xrt_mag %d xrt_mag_next %d n0 %d (%d,%d) pucch0_thres %d, cqi %d, SNRtimes10 %d,sync_pos %d\n",
           frame,slot,uci_pdu->harq->harq_list[1].harq_value,uci_pdu->harq->harq_list[0].harq_value,uci_pdu->harq->harq_confidence_level,xrtmag_dBtimes10,xrtmag_next_dBtimes10,max_n0,uci_stats->pucch0_n00,uci_stats->pucch0_n01,uci_stats->pucch0_thres,cqi,SNRtimes10,gNB->ulsch_stats[0].sync_pos);
     if (pucch_pdu->sr_flag == 1) {
       uci_pdu->sr = calloc(1,sizeof(*uci_pdu->sr));
diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_decoding.c b/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_decoding.c
index 975d7077a0bcb48c4f47125768b75866b5b60864..b8cd59eeadf5af4f4dc5dcf07b1bf4aee9edef84 100644
--- a/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_decoding.c
+++ b/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_decoding.c
@@ -47,6 +47,7 @@
 
 //#define ENABLE_PHY_PAYLOAD_DEBUG 1
 
+#define OAI_UL_LDPC_MAX_NUM_LLR 27000//26112 // NR_LDPC_NCOL_BG1*NR_LDPC_ZMAX = 68*384
 //#define OAI_LDPC_MAX_NUM_LLR 27000//26112 // NR_LDPC_NCOL_BG1*NR_LDPC_ZMAX
 
 static uint64_t nb_total_decod =0;
@@ -58,23 +59,26 @@ int nbDlProcessing =0;
 
 
 static  tpool_t pool_dl;
-
 //extern double cpuf;
-void init_dlsch_tpool(uint8_t num_dlsch_threads) {
-  if( num_dlsch_threads==0)
-    return;
 
-  char *params=calloc(1,(num_dlsch_threads*3)+1);
+void init_dlsch_tpool(uint8_t num_dlsch_threads) {
+  char *params = NULL;
 
-  for (int i=0; i<num_dlsch_threads; i++) {
-    memcpy(params+(i*3),"-1,",3);
+  if( num_dlsch_threads==0) {
+    params = calloc(1,2);
+    memcpy(params,"N",1);
+  }
+  else {
+    params = calloc(1,(num_dlsch_threads*3)+1);
+    for (int i=0; i<num_dlsch_threads; i++) {
+      memcpy(params+(i*3),"-1,",3);
+    }
   }
 
   initNamedTpool(params, &pool_dl, false,"dlsch");
   free(params);
 }
 
-
 void free_nr_ue_dlsch(NR_UE_DLSCH_t **dlschptr,uint8_t N_RB_DL) {
   int i,r;
   uint16_t a_segments = MAX_NUM_NR_DLSCH_SEGMENTS;  //number of segments to be allocated
@@ -223,222 +227,150 @@ void nr_dlsch_unscrambling(int16_t *llr,
   }
 }
 
-uint32_t nr_dlsch_decoding(PHY_VARS_NR_UE *phy_vars_ue,
-                           UE_nr_rxtx_proc_t *proc,
-                           int eNB_id,
-                           short *dlsch_llr,
-                           NR_DL_FRAME_PARMS *frame_parms,
-                           NR_UE_DLSCH_t *dlsch,
-                           NR_DL_UE_HARQ_t *harq_process,
-                           uint32_t frame,
-                           uint16_t nb_symb_sch,
-                           uint8_t nr_slot_rx,
-                           uint8_t harq_pid,
-                           uint8_t is_crnti,
-                           uint8_t llr8_flag) {
-  time_stats_t *dlsch_rate_unmatching_stats=&phy_vars_ue->dlsch_rate_unmatching_stats;
-  time_stats_t *dlsch_turbo_decoding_stats=&phy_vars_ue->dlsch_turbo_decoding_stats;
-  time_stats_t *dlsch_deinterleaving_stats=&phy_vars_ue->dlsch_deinterleaving_stats;
-
-  uint32_t A,E;
-  uint32_t G;
-  uint32_t ret,offset;
-  int32_t no_iteration_ldpc, length_dec;
-  uint32_t r,r_offset=0,Kr=8424,Kr_bytes,K_bits_F,err_flag=0;
-  uint8_t crc_type;
-  int8_t llrProcBuf[NR_LDPC_MAX_NUM_LLR] __attribute__ ((aligned(32)));
-  t_nrLDPC_dec_params decParams;
-  t_nrLDPC_dec_params *p_decParams = &decParams;
-  t_nrLDPC_time_stats procTime = {0};
-  t_nrLDPC_time_stats *p_procTime =&procTime ;
+bool nr_ue_postDecode(PHY_VARS_NR_UE *phy_vars_ue, notifiedFIFO_elt_t *req, bool last, notifiedFIFO_t *nf_p) {
+  ldpcDecode_ue_t *rdata = (ldpcDecode_ue_t*) NotifiedFifoData(req);
+  NR_DL_UE_HARQ_t *harq_process = rdata->harq_process;
+  NR_UE_DLSCH_t *dlsch = (NR_UE_DLSCH_t *) rdata->dlsch;
+  int r = rdata->segment_r;
 
-  if (!harq_process) {
-    LOG_E(PHY,"dlsch_decoding.c: NULL harq_process pointer\n");
-    return(dlsch->max_ldpc_iterations + 1);
-  }
+  bool decodeSuccess = (rdata->decodeIterations < (1+dlsch->max_ldpc_iterations));
 
-  t_nrLDPC_procBuf **p_nrLDPC_procBuf = harq_process->p_nrLDPC_procBuf;
-  // HARQ stats
-  phy_vars_ue->dl_stats[harq_process->round]++;
-  int16_t z [68*384];
-  int8_t l [68*384];
-  //__m128i l;
-  //int16_t inv_d [68*384];
-  uint8_t kc;
-  uint8_t Ilbrm = 1;
-  uint32_t Tbslbrm;// = 950984;
-  uint16_t nb_rb;// = 30;
-  double Coderate;// = 0.0;
-  uint8_t dmrs_Type = harq_process->dmrsConfigType;
-  AssertFatal(dmrs_Type == 0 || dmrs_Type == 1, "Illegal dmrs_type %d\n", dmrs_Type);
-  uint8_t nb_re_dmrs;
+  if (decodeSuccess) {
+    memcpy(harq_process->b+rdata->offset,
+           harq_process->c[r],
+           rdata->Kr_bytes - (harq_process->F>>3) -((harq_process->C>1)?3:0));
 
-  if (dmrs_Type==NFAPI_NR_DMRS_TYPE1) {
-    nb_re_dmrs = 6*harq_process->n_dmrs_cdm_groups;
   } else {
-    nb_re_dmrs = 4*harq_process->n_dmrs_cdm_groups;
-  }
-
-  uint16_t dmrs_length = get_num_dmrs(harq_process->dlDmrsSymbPos);
-  uint32_t i,j;
-  __m128i *pv = (__m128i *)&z;
-  __m128i *pl = (__m128i *)&l;
-  vcd_signal_dumper_dump_function_by_name(VCD_SIGNAL_DUMPER_FUNCTIONS_DLSCH_SEGMENTATION, VCD_FUNCTION_IN);
-
-  //NR_DL_UE_HARQ_t *harq_process = dlsch->harq_processes[0];
-
-  if (!dlsch_llr) {
-    LOG_E(PHY,"dlsch_decoding.c: NULL dlsch_llr pointer\n");
-    return(dlsch->max_ldpc_iterations + 1);
-  }
-
-  if (!frame_parms) {
-    LOG_E(PHY,"dlsch_decoding.c: NULL frame_parms pointer\n");
-    return(dlsch->max_ldpc_iterations + 1);
-  }
-
-  /*if (nr_slot_rx> (frame_parms->slots_per_frame-1)) {
-    printf("dlsch_decoding.c: Illegal slot index %d\n",nr_slot_rx);
-    return(dlsch->max_ldpc_iterations + 1);
-  }*/
-  /*if (harq_process->harq_ack.ack != 2) {
-    LOG_D(PHY, "[UE %d] DLSCH @ SF%d : ACK bit is %d instead of DTX even before PDSCH is decoded!\n",
-        phy_vars_ue->Mod_id, nr_slot_rx, harq_process->harq_ack.ack);
-  }*/
-  //  nb_rb = dlsch->nb_rb;
-  /*
-  if (nb_rb > frame_parms->N_RB_DL) {
-    printf("dlsch_decoding.c: Illegal nb_rb %d\n",nb_rb);
-    return(max_ldpc_iterations + 1);
-    }*/
-  /*harq_pid = dlsch->current_harq_pid[proc->thread_id];
-  if (harq_pid >= 8) {
-    printf("dlsch_decoding.c: Illegal harq_pid %d\n",harq_pid);
-    return(max_ldpc_iterations + 1);
+    if ( !last ) {
+      int nb=abortTpool(&(pool_dl), req->key);
+      nb+=abortNotifiedFIFO(nf_p, req->key);
+      LOG_D(PHY,"downlink segment error %d/%d, aborted %d segments\n",rdata->segment_r,rdata->nbSegments, nb);
+      LOG_D(PHY, "DLSCH %d in error\n",rdata->dlsch_id);
+      last = true;
+    }
   }
-  */
-  nb_rb = harq_process->nb_rb;
-  harq_process->trials[harq_process->round]++;
-  uint16_t nb_rb_oh = 0; // it was not computed at UE side even before and set to 0 in nr_compute_tbs
-  harq_process->TBS = nr_compute_tbs(harq_process->Qm,harq_process->R,nb_rb,nb_symb_sch,nb_re_dmrs*dmrs_length, nb_rb_oh, 0, harq_process->Nl);
-  A = harq_process->TBS;
-  ret = dlsch->max_ldpc_iterations + 1;
-  dlsch->last_iteration_cnt = ret;
-  harq_process->G = nr_get_G(nb_rb, nb_symb_sch, nb_re_dmrs, dmrs_length, harq_process->Qm,harq_process->Nl);
-  G = harq_process->G;
 
-  LOG_D(PHY,"%d.%d DLSCH Decoding, harq_pid %d TBS %d (%d) G %d nb_re_dmrs %d length dmrs %d mcs %d Nl %d nb_symb_sch %d nb_rb %d\n",
-        frame,nr_slot_rx,harq_pid,A,A/8,G, nb_re_dmrs, dmrs_length, harq_process->mcs, harq_process->Nl, nb_symb_sch,nb_rb);
-
-  if ((harq_process->R)<1024)
-    Coderate = (float) (harq_process->R) /(float) 1024;
-  else
-    Coderate = (float) (harq_process->R) /(float) 2048;
+  // if all segments are done
+  if (last) {
+    if (decodeSuccess) {
+      //LOG_D(PHY,"[UE %d] DLSCH: Setting ACK for nr_slot_rx %d TBS %d mcs %d nb_rb %d harq_process->round %d\n",
+      //      phy_vars_ue->Mod_id,nr_slot_rx,harq_process->TBS,harq_process->mcs,harq_process->nb_rb, harq_process->round);
+      harq_process->status = SCH_IDLE;
+      harq_process->round  = 0;
+      harq_process->ack = 1;
 
-  if ((A <=292) || ((A <= NR_MAX_PDSCH_TBS) && (Coderate <= 0.6667)) || Coderate <= 0.25) {
-    p_decParams->BG = 2;
-    kc = 52;
+      //LOG_D(PHY,"[UE %d] DLSCH: Setting ACK for SFN/SF %d/%d (pid %d, status %d, round %d, TBS %d, mcs %d)\n",
+      //  phy_vars_ue->Mod_id, frame, subframe, harq_pid, harq_process->status, harq_process->round,harq_process->TBS,harq_process->mcs);
 
-    if (Coderate < 0.3333) {
-      p_decParams->R = 15;
-    } else if (Coderate <0.6667) {
-      p_decParams->R = 13;
+      //if(is_crnti) {
+      //  LOG_D(PHY,"[UE %d] DLSCH: Setting ACK for nr_slot_rx %d (pid %d, round %d, TBS %d)\n",phy_vars_ue->Mod_id,nr_slot_rx,harq_pid,harq_process->round,harq_process->TBS);
+      //}
+      dlsch->last_iteration_cnt = rdata->decodeIterations;
+      LOG_D(PHY, "DLSCH received ok \n");
     } else {
-      p_decParams->R = 23;
-    }
-  } else {
-    p_decParams->BG = 1;
-    kc = 68;
+      //LOG_D(PHY,"[UE %d] DLSCH: Setting NAK for SFN/SF %d/%d (pid %d, status %d, round %d, TBS %d, mcs %d) Kr %d r %d harq_process->round %d\n",
+      //      phy_vars_ue->Mod_id, frame, nr_slot_rx, harq_pid,harq_process->status, harq_process->round,harq_process->TBS,harq_process->mcs,Kr,r,harq_process->round);
+      harq_process->ack = 0;
+      harq_process->round++;
+      if (harq_process->round >= dlsch->Mlimit) {
+        harq_process->status = SCH_IDLE;
+        harq_process->round  = 0;
+        phy_vars_ue->dl_stats[4]++;
+      }
 
-    if (Coderate < 0.6667) {
-      p_decParams->R = 13;
-    } else if (Coderate <0.8889) {
-      p_decParams->R = 23;
-    } else {
-      p_decParams->R = 89;
+      //if(is_crnti) {
+      //  LOG_D(PHY,"[UE %d] DLSCH: Setting NACK for nr_slot_rx %d (pid %d, pid status %d, round %d/Max %d, TBS %d)\n",
+      //        phy_vars_ue->Mod_id,nr_slot_rx,harq_pid,harq_process->status,harq_process->round,dlsch->Mdlharq,harq_process->TBS);
+      //}
+      dlsch->last_iteration_cnt = dlsch->max_ldpc_iterations + 1;
+      LOG_D(PHY, "DLSCH received nok \n");
     }
+    return true; //stop
   }
-
-  if (harq_process->first_rx == 1) {
-    // This is a new packet, so compute quantities regarding segmentation
-    if (A > NR_MAX_PDSCH_TBS)
-      harq_process->B = A+24;
-    else
-      harq_process->B = A+16;
-
-    nr_segmentation(NULL,
-                    NULL,
-                    harq_process->B,
-                    &harq_process->C,
-                    &harq_process->K,
-                    &harq_process->Z, // [hna] Z is Zc
-                    &harq_process->F,
-                    p_decParams->BG);
-
-    if (LOG_DEBUGFLAG(DEBUG_DLSCH_DECOD) && (!frame%100))
-      LOG_I(PHY,"K %d C %d Z %d nl %d \n", harq_process->K, harq_process->C, p_decParams->Z, harq_process->Nl);
-  }
-
-  VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_DLSCH_SEGMENTATION, VCD_FUNCTION_OUT);
-  p_decParams->Z = harq_process->Z;
-  //printf("dlsch decoding nr segmentation Z %d\n", p_decParams->Z);
-  //printf("coderate %f kc %d \n", Coderate, kc);
-  p_decParams->numMaxIter = dlsch->max_ldpc_iterations;
-  p_decParams->outMode= 0;
-  err_flag = 0;
-  r_offset = 0;
-  uint16_t a_segments = MAX_NUM_NR_DLSCH_SEGMENTS;  //number of segments to be allocated
-
-  if (nb_rb != 273) {
-    a_segments = a_segments*nb_rb;
-    a_segments = a_segments/273 +1;
-  }
-
-  if (harq_process->C > a_segments) {
-    LOG_E(PHY,"Illegal harq_process->C %d > %d\n",harq_process->C,a_segments);
-    return((1+dlsch->max_ldpc_iterations));
+  else
+  {
+	return false; //not last one
   }
+}
 
-  if (LOG_DEBUGFLAG(DEBUG_DLSCH_DECOD))
-    LOG_I(PHY,"Segmentation: C %d, K %d\n",harq_process->C,harq_process->K);
+void nr_processDLSegment(void* arg) {
+  ldpcDecode_ue_t *rdata = (ldpcDecode_ue_t*) arg;
+  NR_UE_DLSCH_t *dlsch = rdata->dlsch;
+#if UE_TIMING_TRACE //TBD
+  PHY_VARS_NR_UE *phy_vars_ue = rdata->phy_vars_ue;
+  time_stats_t *dlsch_rate_unmatching_stats=&phy_vars_ue->dlsch_rate_unmatching_stats;
+  time_stats_t *dlsch_turbo_decoding_stats=&phy_vars_ue->dlsch_turbo_decoding_stats;
+  time_stats_t *dlsch_deinterleaving_stats=&phy_vars_ue->dlsch_deinterleaving_stats;
+#endif
+  NR_DL_UE_HARQ_t *harq_process= rdata->harq_process;
+  t_nrLDPC_dec_params *p_decoderParms = &rdata->decoderParms;
+  int length_dec;
+  int no_iteration_ldpc;
+  int Kr;
+  int Kr_bytes;
+  int K_bits_F;
+  uint8_t crc_type;
+  int i;
+  int j;
+  int r = rdata->segment_r;
+  int A = rdata->A;
+  int E = rdata->E;
+  int Qm = rdata->Qm;
+  //int rv_index = rdata->rv_index;
+  int r_offset = rdata->r_offset;
+  uint8_t kc = rdata->Kc;
+  uint32_t Tbslbrm = rdata->Tbslbrm;
+  short* dlsch_llr = rdata->dlsch_llr;
+  rdata->decodeIterations = dlsch->max_ldpc_iterations + 1;
+  int8_t llrProcBuf[OAI_UL_LDPC_MAX_NUM_LLR] __attribute__ ((aligned(32)));
+
+  int16_t  z [68*384 + 16] __attribute__ ((aligned(16)));
+  int8_t   l [68*384 + 16] __attribute__ ((aligned(16)));
+
+  __m128i *pv = (__m128i*)&z;
+  __m128i *pl = (__m128i*)&l;
+
+  uint8_t  Ilbrm    = 0;
 
-  opp_enabled=1;
   Kr = harq_process->K; // [hna] overwrites this line "Kr = p_decParams->Z*kb"
   Kr_bytes = Kr>>3;
   K_bits_F = Kr-harq_process->F;
 
-  for (r=0; r<harq_process->C; r++) {
-    //printf("start rx segment %d\n",r);
-    E = nr_get_E(G, harq_process->C, harq_process->Qm, harq_process->Nl, r);
+  t_nrLDPC_time_stats procTime = {0};
+  t_nrLDPC_time_stats* p_procTime     = &procTime ;
+
+  t_nrLDPC_procBuf **p_nrLDPC_procBuf = harq_process->p_nrLDPC_procBuf;
+
+#if UE_TIMING_TRACE
     start_meas(dlsch_deinterleaving_stats);
-    VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_DLSCH_DEINTERLEAVING, VCD_FUNCTION_IN);
+#endif
+    //VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_DLSCH_DEINTERLEAVING, VCD_FUNCTION_IN);
     nr_deinterleaving_ldpc(E,
-                           harq_process->Qm,
+                           Qm,
                            harq_process->w[r], // [hna] w is e
                            dlsch_llr+r_offset);
-    VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_DLSCH_DEINTERLEAVING, VCD_FUNCTION_OUT);
+    //VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_DLSCH_DEINTERLEAVING, VCD_FUNCTION_OUT);
+#if UE_TIMING_TRACE
     stop_meas(dlsch_deinterleaving_stats);
+#endif
+#if UE_TIMING_TRACE
     start_meas(dlsch_rate_unmatching_stats);
-    LOG_D(PHY,"HARQ_PID %d Rate Matching Segment %d (coded bits %d,E %d, F %d,unpunctured/repeated bits %d, TBS %d, mod_order %d, nb_rb %d, Nl %d, rv %d, round %d)...\n",
+#endif
+    /* LOG_D(PHY,"HARQ_PID %d Rate Matching Segment %d (coded bits %d,E %d, F %d,unpunctured/repeated bits %d, TBS %d, mod_order %d, nb_rb %d, Nl %d, rv %d, round %d)...\n",
           harq_pid,r, G,E,harq_process->F,
           Kr*3,
           harq_process->TBS,
-          harq_process->Qm,
+          Qm,
           harq_process->nb_rb,
           harq_process->Nl,
           harq_process->rvidx,
-          harq_process->round);
-    VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_DLSCH_RATE_MATCHING, VCD_FUNCTION_IN);
-
-    if ((harq_process->Nl)<4)
-      Tbslbrm = nr_compute_tbslbrm(harq_process->mcs_table,nb_rb,harq_process->Nl);
-    else
-      Tbslbrm = nr_compute_tbslbrm(harq_process->mcs_table,nb_rb,4);
+          harq_process->round); */
+    //VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_DLSCH_RATE_MATCHING, VCD_FUNCTION_IN);
 
     if (nr_rate_matching_ldpc_rx(Ilbrm,
                                  Tbslbrm,
-                                 p_decParams->BG,
-                                 p_decParams->Z,
+                                 p_decoderParms->BG,
+                                 p_decoderParms->Z,
                                  harq_process->d[r],
                                  harq_process->w[r],
                                  harq_process->C,
@@ -446,13 +378,18 @@ uint32_t nr_dlsch_decoding(PHY_VARS_NR_UE *phy_vars_ue,
                                  (harq_process->first_rx==1)?1:0,
                                  E,
                                  harq_process->F,
-                                 Kr-harq_process->F-2*(p_decParams->Z))==-1) {
-    VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_DLSCH_RATE_MATCHING, VCD_FUNCTION_OUT);
-    stop_meas(dlsch_rate_unmatching_stats);
-    LOG_E(PHY,"dlsch_decoding.c: Problem in rate_matching\n");
-    return(dlsch->max_ldpc_iterations + 1);
+                                 Kr-harq_process->F-2*(p_decoderParms->Z))==-1) {
+      //VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_DLSCH_RATE_MATCHING, VCD_FUNCTION_OUT);
+#if UE_TIMING_TRACE
+      stop_meas(dlsch_rate_unmatching_stats);
+#endif
+      LOG_E(PHY,"dlsch_decoding.c: Problem in rate_matching\n");
+      rdata->decodeIterations = dlsch->max_ldpc_iterations + 1;
+	  return;
     } else {
-    stop_meas(dlsch_rate_unmatching_stats);
+#if UE_TIMING_TRACE
+      stop_meas(dlsch_rate_unmatching_stats);
+#endif
     }
 
     r_offset += E;
@@ -480,8 +417,10 @@ uint32_t nr_dlsch_decoding(PHY_VARS_NR_UE *phy_vars_ue,
       length_dec = (harq_process->B+24*harq_process->C)/harq_process->C;
     }
 
-    if (err_flag == 0) {
+    {
+#if UE_TIMING_TRACE
       start_meas(dlsch_turbo_decoding_stats);
+#endif
       //set first 2*Z_c bits to zeros
       memset(&z[0],0,2*harq_process->Z*sizeof(int16_t));
       //set Filler bits
@@ -496,15 +435,15 @@ uint32_t nr_dlsch_decoding(PHY_VARS_NR_UE *phy_vars_ue,
         pl[j] = _mm_packs_epi16(pv[i],pv[i+1]);
       }
 
-      VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_DLSCH_LDPC, VCD_FUNCTION_IN);
-      p_decParams->block_length=length_dec;
-      nrLDPC_initcall(p_decParams, (int8_t*)&pl[0], llrProcBuf);
-      no_iteration_ldpc = nrLDPC_decoder(p_decParams,
+      //VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_DLSCH_LDPC, VCD_FUNCTION_IN);
+      p_decoderParms->block_length=length_dec;
+      nrLDPC_initcall(p_decoderParms, (int8_t*)&pl[0], llrProcBuf);
+      no_iteration_ldpc = nrLDPC_decoder(p_decoderParms,
                                          (int8_t *)&pl[0],
                                          llrProcBuf,
                                          p_nrLDPC_procBuf[r],
                                          p_procTime);
-      VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_DLSCH_LDPC, VCD_FUNCTION_OUT);
+      //VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_DLSCH_LDPC, VCD_FUNCTION_OUT);
 
       // Fixme: correct type is unsigned, but nrLDPC_decoder and all called behind use signed int
       if (check_crc((uint8_t *)llrProcBuf,length_dec,harq_process->F,crc_type)) {
@@ -516,7 +455,7 @@ uint32_t nr_dlsch_decoding(PHY_VARS_NR_UE *phy_vars_ue,
 
         //Temporary hack
         no_iteration_ldpc = dlsch->max_ldpc_iterations;
-        ret = no_iteration_ldpc;
+        rdata->decodeIterations = no_iteration_ldpc;
       } else {
         LOG_D(PHY,"CRC NOT OK\n\033[0m");
       }
@@ -531,210 +470,109 @@ uint32_t nr_dlsch_decoding(PHY_VARS_NR_UE *phy_vars_ue,
         harq_process->c[r][m]= (uint8_t) llrProcBuf[m];
       }
 
-      if (LOG_DEBUGFLAG(DEBUG_DLSCH_DECOD)) {
-        for (int k=0; k<A>>3; k++)
-          LOG_D(PHY,"output decoder [%d] =  0x%02x \n", k, harq_process->c[r][k]);
-        LOG_D(PHY,"no_iterations_ldpc %d (ret %u)\n",no_iteration_ldpc,ret);
-      }
-
+#if UE_TIMING_TRACE
       stop_meas(dlsch_turbo_decoding_stats);
+#endif
     }
-
-    if ((err_flag == 0) && (ret>=(1+dlsch->max_ldpc_iterations))) {// a Code segment is in error so break;
-      LOG_D(PHY,"AbsSubframe %d.%d CRC failed, segment %d/%d \n",frame%1024,nr_slot_rx,r,harq_process->C-1);
-      err_flag = 1;
-    }
-  }
-
-  if (err_flag == 1) {
-    LOG_D(PHY,"[UE %d] DLSCH: Setting NAK for SFN/SF %d/%d (pid %d, status %d, round %d, TBS %d, mcs %d) Kr %d r %d harq_process->round %d\n",
-          phy_vars_ue->Mod_id, frame, nr_slot_rx, harq_pid,harq_process->status, harq_process->round,harq_process->TBS,harq_process->mcs,Kr,r,harq_process->round);
-    harq_process->ack = 0;
-    harq_process->errors[harq_process->round]++;
-
-    if (harq_process->round >= dlsch->Mlimit) {
-      harq_process->status = SCH_IDLE;
-      harq_process->round  = 0;
-      phy_vars_ue->dl_stats[4]++;
-    }
-
-    if(is_crnti) {
-      LOG_D(PHY,"[UE %d] DLSCH: Setting NACK for nr_slot_rx %d (pid %d, pid status %d, round %d/Max %d, TBS %d)\n",
-            phy_vars_ue->Mod_id,nr_slot_rx,harq_pid,harq_process->status,harq_process->round,dlsch->Mdlharq,harq_process->TBS);
-    }
-
-    return((1 + dlsch->max_ldpc_iterations));
-  } else {
-    LOG_D(PHY,"[UE %d] DLSCH: Setting ACK for nr_slot_rx %d TBS %d mcs %d nb_rb %d harq_process->round %d\n",
-          phy_vars_ue->Mod_id,nr_slot_rx,harq_process->TBS,harq_process->mcs,harq_process->nb_rb, harq_process->round);
-    harq_process->status = SCH_IDLE;
-    harq_process->round  = 0;
-    harq_process->ack = 1;
-
-    //LOG_D(PHY,"[UE %d] DLSCH: Setting ACK for SFN/SF %d/%d (pid %d, status %d, round %d, TBS %d, mcs %d)\n",
-    //  phy_vars_ue->Mod_id, frame, subframe, harq_pid, harq_process->status, harq_process->round,harq_process->TBS,harq_process->mcs);
-
-    if(is_crnti) {
-      LOG_D(PHY,"[UE %d] DLSCH: Setting ACK for nr_slot_rx %d (pid %d, round %d, TBS %d)\n",phy_vars_ue->Mod_id,nr_slot_rx,harq_pid,harq_process->round,harq_process->TBS);
-    }
-
-    //LOG_D(PHY,"[UE %d] DLSCH: Setting ACK for subframe %d (pid %d, round %d)\n",phy_vars_ue->Mod_id,subframe,harq_pid,harq_process->round);
-  }
-
-  // Reassembly of Transport block here
-  offset = 0;
-  Kr = harq_process->K;
-  Kr_bytes = Kr>>3;
-  VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_DLSCH_COMBINE_SEG, VCD_FUNCTION_IN);
-
-  for (r=0; r<harq_process->C; r++) {
-    memcpy(harq_process->b+offset,
-           harq_process->c[r],
-           Kr_bytes - (harq_process->F>>3) - ((harq_process->C>1)?3:0));
-    offset += (Kr_bytes - (harq_process->F>>3) - ((harq_process->C>1)?3:0));
-
-    if (LOG_DEBUGFLAG(DEBUG_DLSCH_DECOD)) {
-      LOG_D(PHY,"Segment %u : Kr= %u bytes\n",r,Kr_bytes);
-      LOG_D(PHY,"copied %d bytes to b sequence (harq_pid %d)\n",
-            (Kr_bytes - (harq_process->F>>3)-((harq_process->C>1)?3:0)),harq_pid);
-      LOG_D(PHY,"b[0] = %p,c[%d] = %p\n",
-            (void *)(uint64_t)(harq_process->b[offset]),
-            harq_process->F>>3,
-            (void *)(uint64_t)(harq_process->c[r]) );
-
-      if (frame%100 == 0) {
-        LOG_D (PHY, "Printing 60 first payload bytes at frame: %d ", frame);
-
-        for (int i = 0; i <60 ; i++) { //Kr_bytes
-          LOG_D(PHY, "[%d] : %x ", i, harq_process->b[i]);
-        }
-      }
-    }
-  }
-
-  VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_DLSCH_COMBINE_SEG, VCD_FUNCTION_OUT);
-  dlsch->last_iteration_cnt = ret;
-  return(ret);
 }
 
-
-uint32_t  nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
-                                    UE_nr_rxtx_proc_t *proc,
-                                    int eNB_id,
-                                    short *dlsch_llr,
-                                    NR_DL_FRAME_PARMS *frame_parms,
-                                    NR_UE_DLSCH_t *dlsch,
-                                    NR_DL_UE_HARQ_t *harq_process,
-                                    uint32_t frame,
-                                    uint16_t nb_symb_sch,
-                                    uint8_t nr_slot_rx,
-                                    uint8_t harq_pid,
-                                    uint8_t is_crnti,
-                                    uint8_t llr8_flag) {
-
-  time_stats_t *dlsch_rate_unmatching_stats=&phy_vars_ue->dlsch_rate_unmatching_stats;
-  time_stats_t *dlsch_turbo_decoding_stats=&phy_vars_ue->dlsch_turbo_decoding_stats;
-  time_stats_t *dlsch_deinterleaving_stats=&phy_vars_ue->dlsch_deinterleaving_stats;
+uint32_t nr_dlsch_decoding(PHY_VARS_NR_UE *phy_vars_ue,
+                           UE_nr_rxtx_proc_t *proc,
+                           int eNB_id,
+                           short *dlsch_llr,
+                           NR_DL_FRAME_PARMS *frame_parms,
+                           NR_UE_DLSCH_t *dlsch,
+                           NR_DL_UE_HARQ_t *harq_process,
+                           uint32_t frame,
+                           uint16_t nb_symb_sch,
+                           uint8_t nr_slot_rx,
+                           uint8_t harq_pid,
+                           uint8_t is_crnti,
+                           uint8_t llr8_flag) {
   uint32_t A,E;
   uint32_t G;
   uint32_t ret,offset;
-  uint32_t r,r_offset=0,Kr=8424,Kr_bytes,err_flag=0,K_bits_F;
-  uint8_t crc_type;
-  //UE_rxtx_proc_t *proc = &phy_vars_ue->proc;
-  int32_t no_iteration_ldpc,length_dec;
-  /*uint8_t C;
-  uint8_t Qm;
-  uint8_t r_thread;
-  uint32_t Er, Gp,GpmodC;*/
+  uint32_t r,r_offset=0,Kr=8424,Kr_bytes;
   t_nrLDPC_dec_params decParams;
   t_nrLDPC_dec_params *p_decParams = &decParams;
-  t_nrLDPC_time_stats procTime;
-  t_nrLDPC_time_stats *p_procTime =&procTime ;
-  int8_t llrProcBuf[NR_LDPC_MAX_NUM_LLR] __attribute__ ((aligned(32)));
 
   if (!harq_process) {
     LOG_E(PHY,"dlsch_decoding.c: NULL harq_process pointer\n");
-    return(dlsch->max_ldpc_iterations);
+    return(dlsch->max_ldpc_iterations + 1);
   }
 
-  t_nrLDPC_procBuf *p_nrLDPC_procBuf = harq_process->p_nrLDPC_procBuf[0];
-  uint8_t Nl=4;
-  int16_t z [68*384];
-  int8_t l [68*384];
+  // HARQ stats
+  phy_vars_ue->dl_stats[harq_process->round]++;
   uint8_t kc;
-  uint8_t Ilbrm = 1;
-  uint32_t Tbslbrm = 950984;
-  uint16_t nb_rb = 30;
-  double Coderate = 0.0;
-  uint8_t dmrs_type = harq_process->dmrsConfigType;
+  uint32_t Tbslbrm;// = 950984;
+  uint16_t nb_rb;// = 30;
+  double Coderate;// = 0.0;
+  uint8_t dmrs_Type = harq_process->dmrsConfigType;
+  AssertFatal(dmrs_Type == 0 || dmrs_Type == 1, "Illegal dmrs_type %d\n", dmrs_Type);
   uint8_t nb_re_dmrs;
 
-  if (dmrs_type == NFAPI_NR_DMRS_TYPE1)
+  if (dmrs_Type==NFAPI_NR_DMRS_TYPE1) {
     nb_re_dmrs = 6*harq_process->n_dmrs_cdm_groups;
-  else
+  } else {
     nb_re_dmrs = 4*harq_process->n_dmrs_cdm_groups;
+  }
 
-  uint16_t length_dmrs = get_num_dmrs(harq_process->dlDmrsSymbPos);
-  uint32_t i,j;
-  __m128i *pv = (__m128i *)&z;
-  __m128i *pl = (__m128i *)&l;
-  notifiedFIFO_t nf;
-  initNotifiedFIFO(&nf);
-  VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_DLSCH_SEGMENTATION, VCD_FUNCTION_IN);
+  uint16_t dmrs_length = get_num_dmrs(harq_process->dlDmrsSymbPos);
+  vcd_signal_dumper_dump_function_by_name(VCD_SIGNAL_DUMPER_FUNCTIONS_DLSCH_SEGMENTATION, VCD_FUNCTION_IN);
 
+  //NR_DL_UE_HARQ_t *harq_process = dlsch->harq_processes[0];
+  
+  int nbDecode = 0;
+  
   if (!dlsch_llr) {
     LOG_E(PHY,"dlsch_decoding.c: NULL dlsch_llr pointer\n");
-    return(dlsch->max_ldpc_iterations);
+    return(dlsch->max_ldpc_iterations + 1);
   }
 
   if (!frame_parms) {
     LOG_E(PHY,"dlsch_decoding.c: NULL frame_parms pointer\n");
-    return(dlsch->max_ldpc_iterations);
+    return(dlsch->max_ldpc_iterations + 1);
   }
 
-  /* if (nr_slot_rx> (frame_parms->slots_per_frame-1)) {
-     printf("dlsch_decoding.c: Illegal slot index %d\n",nr_slot_rx);
-     return(dlsch->max_ldpc_iterations);
-   }
-
-   if (dlsch->harq_ack[nr_slot_rx].ack != 2) {
-     LOG_D(PHY, "[UE %d] DLSCH @ SF%d : ACK bit is %d instead of DTX even before PDSCH is decoded!\n",
-         phy_vars_ue->Mod_id, nr_slot_rx, dlsch->harq_ack[nr_slot_rx].ack);
-   }*/
+  /*if (nr_slot_rx> (frame_parms->slots_per_frame-1)) {
+    printf("dlsch_decoding.c: Illegal slot index %d\n",nr_slot_rx);
+    return(dlsch->max_ldpc_iterations + 1);
+  }*/
+  /*if (harq_process->harq_ack.ack != 2) {
+    LOG_D(PHY, "[UE %d] DLSCH @ SF%d : ACK bit is %d instead of DTX even before PDSCH is decoded!\n",
+        phy_vars_ue->Mod_id, nr_slot_rx, harq_process->harq_ack.ack);
+  }*/
+  //  nb_rb = dlsch->nb_rb;
   /*
   if (nb_rb > frame_parms->N_RB_DL) {
     printf("dlsch_decoding.c: Illegal nb_rb %d\n",nb_rb);
-    return(max_ldpc_iterations);
+    return(max_ldpc_iterations + 1);
     }*/
   /*harq_pid = dlsch->current_harq_pid[proc->thread_id];
   if (harq_pid >= 8) {
     printf("dlsch_decoding.c: Illegal harq_pid %d\n",harq_pid);
-    return(max_ldpc_iterations);
+    return(max_ldpc_iterations + 1);
   }
   */
   nb_rb = harq_process->nb_rb;
   harq_process->trials[harq_process->round]++;
-  // HARQ stats
-  phy_vars_ue->dl_stats[harq_process->round]++;
   uint16_t nb_rb_oh = 0; // it was not computed at UE side even before and set to 0 in nr_compute_tbs
-  harq_process->TBS = nr_compute_tbs(harq_process->Qm,harq_process->R,nb_rb,nb_symb_sch,nb_re_dmrs*length_dmrs, nb_rb_oh, 0, harq_process->Nl);
+  harq_process->TBS = nr_compute_tbs(harq_process->Qm,harq_process->R,nb_rb,nb_symb_sch,nb_re_dmrs*dmrs_length, nb_rb_oh, 0, harq_process->Nl);
   A = harq_process->TBS;
   ret = dlsch->max_ldpc_iterations + 1;
   dlsch->last_iteration_cnt = ret;
-  harq_process->G = nr_get_G(nb_rb, nb_symb_sch, nb_re_dmrs, length_dmrs, harq_process->Qm,harq_process->Nl);
+  harq_process->G = nr_get_G(nb_rb, nb_symb_sch, nb_re_dmrs, dmrs_length, harq_process->Qm,harq_process->Nl);
   G = harq_process->G;
-  LOG_D(PHY,"DLSCH Decoding main, harq_pid %d TBS %d G %d, nb_re_dmrs %d, length_dmrs %d  mcs %d Nl %d nb_symb_sch %d nb_rb %d\n",harq_pid,A,G, nb_re_dmrs, length_dmrs, harq_process->mcs,
-        harq_process->Nl, nb_symb_sch,nb_rb);
-  proc->decoder_main_available = 1;
-  proc->decoder_thread_available = 0;
-  proc->decoder_thread_available1 = 0;
+
+  LOG_D(PHY,"%d.%d DLSCH Decoding, harq_pid %d TBS %d (%d) G %d nb_re_dmrs %d length dmrs %d mcs %d Nl %d nb_symb_sch %d nb_rb %d\n",
+        frame,nr_slot_rx,harq_pid,A,A/8,G, nb_re_dmrs, dmrs_length, harq_process->mcs, harq_process->Nl, nb_symb_sch,nb_rb);
 
   if ((harq_process->R)<1024)
     Coderate = (float) (harq_process->R) /(float) 1024;
   else
     Coderate = (float) (harq_process->R) /(float) 2048;
 
-  if ((A <= 292) || ((A <= NR_MAX_PDSCH_TBS) && (Coderate <= 0.6667)) || Coderate <= 0.25) {
+  if ((A <=292) || ((A <= NR_MAX_PDSCH_TBS) && (Coderate <= 0.6667)) || Coderate <= 0.25) {
     p_decParams->BG = 2;
     kc = 52;
 
@@ -770,15 +608,24 @@ uint32_t  nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
                     harq_process->B,
                     &harq_process->C,
                     &harq_process->K,
-                    &harq_process->Z,
+                    &harq_process->Z, // [hna] Z is Zc
                     &harq_process->F,
                     p_decParams->BG);
+
+    if (LOG_DEBUGFLAG(DEBUG_DLSCH_DECOD) && (!frame%100))
+      LOG_I(PHY,"K %d C %d Z %d nl %d \n", harq_process->K, harq_process->C, p_decParams->Z, harq_process->Nl);
   }
+  if ((harq_process->Nl)<4)
+    Tbslbrm = nr_compute_tbslbrm(harq_process->mcs_table,nb_rb,harq_process->Nl);
+  else
+    Tbslbrm = nr_compute_tbslbrm(harq_process->mcs_table,nb_rb,4);
 
+  VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_DLSCH_SEGMENTATION, VCD_FUNCTION_OUT);
   p_decParams->Z = harq_process->Z;
+  //printf("dlsch decoding nr segmentation Z %d\n", p_decParams->Z);
+  //printf("coderate %f kc %d \n", Coderate, kc);
   p_decParams->numMaxIter = dlsch->max_ldpc_iterations;
   p_decParams->outMode= 0;
-  err_flag = 0;
   r_offset = 0;
   uint16_t a_segments = MAX_NUM_NR_DLSCH_SEGMENTS;  //number of segments to be allocated
 
@@ -793,628 +640,59 @@ uint32_t  nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
   }
 
   if (LOG_DEBUGFLAG(DEBUG_DLSCH_DECOD))
-    LOG_D(PHY,"Segmentation: C %d, K %d\n",harq_process->C,harq_process->K);
+    LOG_I(PHY,"Segmentation: C %d, K %d\n",harq_process->C,harq_process->K);
 
-  notifiedFIFO_elt_t *res_dl;
   opp_enabled=1;
-  if (harq_process->C>1) {
-    for (int nb_seg =1 ; nb_seg<harq_process->C; nb_seg++) {
-      if ( (res_dl=tryPullTpool(&nf, &pool_dl)) != NULL ) {
-        pushNotifiedFIFO_nothreadSafe(&freeBlocks_dl,res_dl);
-      }
-
-      AssertFatal((msgToPush_dl=pullNotifiedFIFO_nothreadSafe(&freeBlocks_dl)) != NULL,"chained list failure");
-      nr_rxtx_thread_data_t *curMsg=(nr_rxtx_thread_data_t *)NotifiedFifoData(msgToPush_dl);
-      curMsg->UE=phy_vars_ue;
-      nbDlProcessing++;
-      memset(&curMsg->proc, 0, sizeof(curMsg->proc));
-      curMsg->proc.frame_rx   = proc->frame_rx;
-      curMsg->proc.nr_slot_rx = proc->nr_slot_rx;
-      curMsg->proc.thread_id  = proc->thread_id;
-      curMsg->proc.num_seg    = nb_seg;
-      curMsg->proc.eNB_id= eNB_id;
-      curMsg->proc.harq_pid=harq_pid;
-      curMsg->proc.llr8_flag = llr8_flag;
-      msgToPush_dl->key= (nr_slot_rx%2) ? (nb_seg+30): nb_seg;
-      pushTpool(&pool_dl, msgToPush_dl);
-      /*Qm= harq_process->Qm;
-        Nl=harq_process->Nl;
-        r_thread = harq_process->C/2-1;
-        C= harq_process->C;
-
-        Gp = G/Nl/Qm;
-        GpmodC = Gp%C;
-
-
-        if (r_thread < (C-(GpmodC)))
-          Er = Nl*Qm * (Gp/C);
-        else
-          Er = Nl*Qm * ((GpmodC==0?0:1) + (Gp/C));
-        printf("mthread Er %d\n", Er);
-
-        printf("mthread instance_cnt_dlsch_td %d\n",  proc->instance_cnt_dlsch_td);*/
-    }
-
-    //proc->decoder_main_available = 1;
-  }
-
-  r = 0;
-
-  if (r==0) r_offset =0;
-
-  Kr = harq_process->K;
+  Kr = harq_process->K; // [hna] overwrites this line "Kr = p_decParams->Z*kb"
   Kr_bytes = Kr>>3;
-  K_bits_F = Kr-harq_process->F;
-  E = nr_get_E(G, harq_process->C, harq_process->Qm, harq_process->Nl, r);
-  /*
-  printf("Subblock deinterleaving, dlsch_llr %p, w %p\n",
-   dlsch_llr+r_offset,
-   &harq_process->w[r]);
-  */
-  start_meas(dlsch_deinterleaving_stats);
-  nr_deinterleaving_ldpc(E,
-                         harq_process->Qm,
-                         harq_process->w[r],
-                         dlsch_llr+r_offset);
-
-  if (LOG_DEBUGFLAG(DEBUG_DLSCH_DECOD))
-    for (int i =0; i<16; i++)
-      LOG_D(PHY,"rx output deinterleaving w[%d]= %d r_offset %u\n", i,harq_process->w[r][i], r_offset);
-
-  stop_meas(dlsch_deinterleaving_stats);
-  start_meas(dlsch_rate_unmatching_stats);
-
-  if (LOG_DEBUGFLAG(DEBUG_DLSCH_DECOD))
-    LOG_I(PHY,"HARQ_PID %d Rate Matching Segment %d (coded bits %d,unpunctured/repeated bits %d, TBS %d, mod_order %d, nb_rb %d, Nl %d, rv %d, round %d)...\n",
-          harq_pid,r, G,
-          Kr*3,
-          harq_process->TBS,
-          harq_process->Qm,
-          harq_process->nb_rb,
-          harq_process->Nl,
-          harq_process->rvidx,
-          harq_process->round);
-
-  // for tbslbrm calculation according to 5.4.2.1 of 38.212
-  if (harq_process->Nl < Nl)
-    Nl = harq_process->Nl;
-
-  Tbslbrm = nr_compute_tbslbrm(harq_process->mcs_table,nb_rb,harq_process->Nl);
-
-  if (nr_rate_matching_ldpc_rx(Ilbrm,
-                               Tbslbrm,
-                               p_decParams->BG,
-                               p_decParams->Z,
-                               harq_process->d[r],
-                               harq_process->w[r],
-                               harq_process->C,
-                               harq_process->rvidx,
-                               (harq_process->first_rx==1)?1:0,
-                               E,
-                               harq_process->F,
-                               Kr-harq_process->F-2*(p_decParams->Z))==-1) {
-    stop_meas(dlsch_rate_unmatching_stats);
-    LOG_E(PHY,"dlsch_decoding.c: Problem in rate_matching\n");
-    return(dlsch->max_ldpc_iterations);
-  } else {
-    stop_meas(dlsch_rate_unmatching_stats);
-  }
-
-  if (LOG_DEBUGFLAG(DEBUG_DLSCH_DECOD))
-    for (int i =0; i<16; i++)
-      LOG_I(PHY,"rx output ratematching d[%d]= %d r_offset %u\n", i,harq_process->d[r][i], r_offset);
-
-  if (LOG_DEBUGFLAG(DEBUG_DLSCH_DECOD)) {
-    if (r==0) {
-      LOG_M("decoder_llr.m","decllr",dlsch_llr,G,1,0);
-      LOG_M("decoder_in.m","dec",&harq_process->d[0][96],(3*8*Kr_bytes)+12,1,0);
-    }
-
-    LOG_D(PHY,"decoder input(segment %u) :",r);
-
-    for (int i=0; i<(3*8*Kr_bytes); i++)
-      LOG_D(PHY,"%d : %d\n",i,harq_process->d[r][i]);
-
-    LOG_D(PHY,"\n");
-  }
-
-  memset(harq_process->c[r],0,Kr_bytes);
-
-  if (harq_process->C == 1) {
-    if (A > NR_MAX_PDSCH_TBS)
-      crc_type = CRC24_A;
-    else
-      crc_type = CRC16;
-
-    length_dec = harq_process->B;
-  } else {
-    crc_type = CRC24_B;
-    length_dec = (harq_process->B+24*harq_process->C)/harq_process->C;
-  }
-
-  //#ifndef __AVX2__
-
-  if (err_flag == 0) {
-    /*
-            LOG_D(PHY, "LDPC algo Kr=%d cb_cnt=%d C=%d nbRB=%d crc_type %d TBSInput=%d TBSHarq=%d TBSplus24=%d mcs=%d Qm=%d RIV=%d round=%d maxIter %d\n",
-                                Kr,r,harq_process->C,harq_process->nb_rb,crc_type,A,harq_process->TBS,
-                                harq_process->B,harq_process->mcs,harq_process->Qm,harq_process->rvidx,harq_process->round,dlsch->max_ldpc_iterations);
-    */
-    start_meas(dlsch_turbo_decoding_stats);
-    LOG_D(PHY,"mthread AbsSubframe %d.%d Start LDPC segment %d/%d \n",frame%1024,nr_slot_rx,r,harq_process->C-1);
-    /*for (int cnt =0; cnt < (kc-2)*p_decParams->Z; cnt++){
-      inv_d[cnt] = (1)*harq_process->d[r][cnt];
-    }*/
-    //set first 2*Z_c bits to zeros
-    memset(&z[0],0,2*harq_process->Z*sizeof(int16_t));
-    //set Filler bits
-    memset((&z[0]+K_bits_F),127,harq_process->F*sizeof(int16_t));
-    //Move coded bits before filler bits
-    memcpy((&z[0]+2*harq_process->Z),harq_process->d[r],(K_bits_F-2*harq_process->Z)*sizeof(int16_t));
-    //skip filler bits
-    memcpy((&z[0]+Kr),harq_process->d[r]+(Kr-2*harq_process->Z),(kc*harq_process->Z-Kr)*sizeof(int16_t));
-
-    //Saturate coded bits before decoding into 8 bits values
-    for (i=0, j=0; j < ((kc*harq_process->Z)>>4)+1;  i+=2, j++) {
-      pl[j] = _mm_packs_epi16(pv[i],pv[i+1]);
-    }
-    p_decParams->block_length=length_dec;
-    nrLDPC_initcall(p_decParams, (int8_t*)&pl[0], llrProcBuf);
-    no_iteration_ldpc = nrLDPC_decoder(p_decParams,
-                                       (int8_t *)&pl[0],
-                                       llrProcBuf,
-                                       p_nrLDPC_procBuf,
-                                       p_procTime);
-    nb_total_decod++;
-
-    if (no_iteration_ldpc > 10) {
-      nb_error_decod++;
-      ret = 1+dlsch->max_ldpc_iterations;
-    } else {
-      ret=2;
-    }
-
-    if (check_crc((uint8_t *)llrProcBuf,length_dec,harq_process->F,crc_type)) {
-      LOG_D(PHY,"Segment %u CRC OK\n",r);
-      ret = 2;
-    } else {
-      ret = 1+dlsch->max_ldpc_iterations;
-    }
-
-    if (!nb_total_decod%10000) {
-      printf("Error number of iteration LPDC %d %ld/%ld \n", no_iteration_ldpc, nb_error_decod,nb_total_decod);
-      fflush(stdout);
-    }
-
-    for (int m=0; m < Kr>>3; m ++) {
-      harq_process->c[r][m]= (uint8_t) llrProcBuf[m];
-    }
-
-    /*for (int u=0; u < Kr>>3; u ++)
-      {
-        ullrProcBuf[u]= (uint8_t) llrProcBuf[u];
-      }
-
-
-      printf("output unsigned ullrProcBuf \n");
-
-      for (int j=0; j < Kr>>3; j ++)
-      {
-        printf(" %d \n", ullrProcBuf[j]);
-      }
-      printf(" \n");*/
-    //printf("output channel decoder %d %d %d %d %d \n", harq_process->c[r][0], harq_process->c[r][1], harq_process->c[r][2],harq_process->c[r][3], harq_process->c[r][4]);
-
-    //printf("output decoder %d %d %d %d %d \n", harq_process->c[r][0], harq_process->c[r][1], harq_process->c[r][2],harq_process->c[r][3], harq_process->c[r][4]);
-    if (LOG_DEBUGFLAG(DEBUG_DLSCH_DECOD))
-      for (int k=0; k<32; k++)
-        LOG_D(PHY,"output decoder [%d] =  0x%02x \n", k, harq_process->c[r][k]);
-
-    stop_meas(dlsch_turbo_decoding_stats);
-  }
-
-
-  if ((err_flag == 0) && (ret>=(1+dlsch->max_ldpc_iterations))) {// a Code segment is in error so break;
-    LOG_D(PHY,"AbsSubframe %d.%d CRC failed, segment %d/%d \n",frame%1024,nr_slot_rx,r,harq_process->C-1);
-    err_flag = 1;
-  }
-
-  //} //loop r
-
-  if (err_flag == 1) {
-    if (LOG_DEBUGFLAG(DEBUG_DLSCH_DECOD))
-      LOG_I(PHY,"[UE %d] DLSCH: Setting NAK for SFN/SF %d/%d (pid %d, status %d, round %d, TBS %d, mcs %d) Kr %d r %d harq_process->round %d\n",
-            phy_vars_ue->Mod_id, frame, nr_slot_rx, harq_pid,harq_process->status, harq_process->round,harq_process->TBS,harq_process->mcs,Kr,r,harq_process->round);
-    harq_process->ack = 0;
-    harq_process->errors[harq_process->round]++;
-    harq_process->round++;
-
-    if (harq_process->round >= dlsch->Mlimit) {
-      harq_process->status = SCH_IDLE;
-      harq_process->round  = 0;
-    }
-
-    if(is_crnti) {
-      LOG_D(PHY,"[UE %d] DLSCH: Setting NACK for nr_slot_rx %d (pid %d, pid status %d, round %d/Max %d, TBS %d)\n",
-            phy_vars_ue->Mod_id,nr_slot_rx,harq_pid,harq_process->status,harq_process->round,dlsch->Mlimit,harq_process->TBS);
-    }
-
-    return((1+dlsch->max_ldpc_iterations));
-  } else {
-    if (LOG_DEBUGFLAG(DEBUG_DLSCH_DECOD))
-      LOG_I(PHY,"[UE %d] DLSCH: Setting ACK for nr_slot_rx %d TBS %d mcs %d nb_rb %d\n",
-            phy_vars_ue->Mod_id,nr_slot_rx,harq_process->TBS,harq_process->mcs,harq_process->nb_rb);
-
-    harq_process->status = SCH_IDLE;
-    harq_process->round  = 0;
-    harq_process->ack = 1;
-    //LOG_I(PHY,"[UE %d] DLSCH: Setting ACK for SFN/SF %d/%d (pid %d, status %d, round %d, TBS %d, mcs %d)\n",
-    //  phy_vars_ue->Mod_id, frame, subframe, harq_pid, harq_process->status, harq_process->round,harq_process->TBS,harq_process->mcs);
-
-    if(is_crnti) {
-      LOG_D(PHY,"[UE %d] DLSCH: Setting ACK for nr_slot_rx %d (pid %d, round %d, TBS %d)\n",phy_vars_ue->Mod_id,nr_slot_rx,harq_pid,harq_process->round,harq_process->TBS);
-    }
-
-    //LOG_D(PHY,"[UE %d] DLSCH: Setting ACK for subframe %d (pid %d, round %d)\n",phy_vars_ue->Mod_id,subframe,harq_pid,harq_process->round);
-  }
-
-  // Reassembly of Transport block here
   offset = 0;
-  /*
-  printf("harq_pid %d\n",harq_pid);
-  printf("F %d, Fbytes %d\n",harq_process->F,harq_process->F>>3);
-  printf("C %d\n",harq_process->C);
-  */
-  //uint32_t wait = 0;
-  /* while((proc->decoder_thread_available == 0) )
-  {
-          usleep(1);
-  }
-  proc->decoder_thread_available == 0;*/
-  /*notifiedFIFO_elt_t *res1=tryPullTpool(&nf, Tpool);
-  if (!res1) {
-    printf("mthread trypull null\n");
-    usleep(1);
-    wait++;
-  }*/
-  //usleep(50);
-  proc->decoder_main_available = 0;
-  Kr = harq_process->K; //to check if same K in all segments
-  Kr_bytes = Kr>>3;
-
+  void (*nr_processDLSegment_ptr)(void*) = &nr_processDLSegment;
+  notifiedFIFO_t nf;
+  initNotifiedFIFO(&nf);
   for (r=0; r<harq_process->C; r++) {
-    memcpy(harq_process->b+offset,
-           harq_process->c[r],
-           Kr_bytes- - (harq_process->F>>3) -((harq_process->C>1)?3:0));
+    //printf("start rx segment %d\n",r);
+    E = nr_get_E(G, harq_process->C, harq_process->Qm, harq_process->Nl, r);
+    union ldpcReqUnion id = {.s={dlsch->rnti,frame,nr_slot_rx,0,0}};
+    notifiedFIFO_elt_t *req=newNotifiedFIFO_elt(sizeof(ldpcDecode_ue_t), id.p, &nf, nr_processDLSegment_ptr);
+    ldpcDecode_ue_t * rdata=(ldpcDecode_ue_t *) NotifiedFifoData(req);
+
+    rdata->phy_vars_ue = phy_vars_ue;
+    rdata->harq_process = harq_process;
+    rdata->decoderParms = decParams;
+    rdata->dlsch_llr = dlsch_llr;
+    rdata->Kc = kc;
+    rdata->harq_pid = harq_pid;
+    rdata->segment_r = r;
+    rdata->nbSegments = harq_process->C;
+    rdata->E = E;
+    rdata->A = A;
+    rdata->Qm = harq_process->Qm;
+    rdata->r_offset = r_offset;
+    rdata->Kr_bytes = Kr_bytes;
+    rdata->rv_index = harq_process->rvidx;
+    rdata->Tbslbrm = Tbslbrm;
+    rdata->offset = offset;
+    rdata->dlsch = dlsch;
+    rdata->dlsch_id = 0;
+    pushTpool(&(pool_dl),req);
+    nbDecode++;
+    LOG_D(PHY,"Added a block to decode, in pipe: %d\n",nbDecode);
+    r_offset += E;
     offset += (Kr_bytes - (harq_process->F>>3) - ((harq_process->C>1)?3:0));
-
-    if (LOG_DEBUGFLAG(DEBUG_DLSCH_DECOD)) {
-      LOG_I(PHY,"Segment %u : Kr= %u bytes\n",r,Kr_bytes);
-      LOG_I(PHY,"copied %d bytes to b sequence (harq_pid %d)\n",
-            (Kr_bytes - (harq_process->F>>3)-((harq_process->C>1)?3:0)),harq_pid);
-      LOG_I(PHY,"b[0] = %p,c[%d] = %p\n",
-            (void *)(uint64_t)(harq_process->b[offset]),
-            harq_process->F>>3,
-            (void *)(uint64_t)(harq_process->c[r]));
-    }
+    //////////////////////////////////////////////////////////////////////////////////////////
   }
-
-  VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_DLSCH_SEGMENTATION, VCD_FUNCTION_OUT);
-  dlsch->last_iteration_cnt = ret;
-  //proc->decoder_thread_available = 0;
-  //proc->decoder_main_available = 0;
-  return(ret);
-}
-
-
-void nr_dlsch_decoding_process(void *arg) {
-  nr_rxtx_thread_data_t *rxtxD= (nr_rxtx_thread_data_t *)arg;
-  UE_nr_rxtx_proc_t *proc = &rxtxD->proc;
-  PHY_VARS_NR_UE    *phy_vars_ue   = rxtxD->UE;
-  int llr8_flag1;
-  int32_t no_iteration_ldpc,length_dec;
-  t_nrLDPC_dec_params decParams;
-  t_nrLDPC_dec_params *p_decParams = &decParams;
-  t_nrLDPC_time_stats procTime;
-  t_nrLDPC_time_stats *p_procTime =&procTime ;
-  int8_t llrProcBuf[NR_LDPC_MAX_NUM_LLR] __attribute__ ((aligned(32)));
-  t_nrLDPC_procBuf *p_nrLDPC_procBuf;
-  int16_t z [68*384];
-  int8_t l [68*384];
-  //__m128i l;
-  //int16_t inv_d [68*384];
-  //int16_t *p_invd =&inv_d;
-  uint8_t  kc;
-  uint8_t Ilbrm = 1;
-  uint32_t Tbslbrm = 950984;
-  uint16_t nb_rb = 30; //to update
-  double Coderate = 0.0;
-  uint16_t nb_symb_sch = 12;
-  uint8_t nb_re_dmrs = 6;
-  uint16_t length_dmrs = 1;
-  uint32_t i,j;
-  __m128i *pv = (__m128i *)&z;
-  __m128i *pl = (__m128i *)&l;
-  proc->instance_cnt_dlsch_td=-1;
-  //proc->nr_slot_rx = proc->sub_frame_start * frame_parms->slots_per_subframe;
-  proc->decoder_thread_available = 1;
-  time_stats_t *dlsch_rate_unmatching_stats=&phy_vars_ue->dlsch_rate_unmatching_stats;
-  time_stats_t *dlsch_turbo_decoding_stats=&phy_vars_ue->dlsch_turbo_decoding_stats;
-  time_stats_t *dlsch_deinterleaving_stats=&phy_vars_ue->dlsch_deinterleaving_stats;
-
-  uint32_t A,E;
-  uint32_t G;
-  uint32_t ret;
-  uint32_t r,r_offset=0,Kr,Kr_bytes,err_flag=0,K_bits_F;
-  uint8_t crc_type;
-  uint8_t C,Cprime;
-  uint8_t Qm;
-  uint8_t Nl;
-  //uint32_t Er;
-  int eNB_id                = proc->eNB_id;
-  int harq_pid              = proc->harq_pid;
-  llr8_flag1                = proc->llr8_flag;
-  int frame                 = proc->frame_rx;
-  r                     = proc->num_seg;
-  NR_UE_DLSCH_t *dlsch      = phy_vars_ue->dlsch[proc->thread_id][eNB_id][0];
-  NR_DL_UE_HARQ_t *harq_process  = dlsch->harq_processes[harq_pid];
-  short *dlsch_llr        = phy_vars_ue->pdsch_vars[proc->thread_id][eNB_id]->llr[0];
-  p_nrLDPC_procBuf = harq_process->p_nrLDPC_procBuf[r];
-  nb_symb_sch = harq_process->nb_symbols;
-  LOG_D(PHY,"dlsch decoding process frame %d slot %d segment %d r %u nb symb %d \n", frame, proc->nr_slot_rx, proc->num_seg, r, harq_process->nb_symbols);
-  nb_rb = harq_process->nb_rb;
-  harq_process->trials[harq_process->round]++;
-  uint16_t nb_rb_oh = 0; // it was not computed at UE side even before and set to 0 in nr_compute_tbs
-  harq_process->TBS = nr_compute_tbs(harq_process->Qm,harq_process->R,nb_rb,nb_symb_sch,nb_re_dmrs*length_dmrs, nb_rb_oh, 0, harq_process->Nl);
-  A = harq_process->TBS; //2072 for QPSK 1/3
-  ret = dlsch->max_ldpc_iterations;
-  harq_process->G = nr_get_G(nb_rb, nb_symb_sch, nb_re_dmrs, length_dmrs, harq_process->Qm,harq_process->Nl);
-  G = harq_process->G;
-
-  LOG_D(PHY,"DLSCH Decoding process, harq_pid %d TBS %d G %d mcs %d Nl %d nb_symb_sch %d nb_rb %d, Coderate %d\n",harq_pid,A,G, harq_process->mcs, harq_process->Nl, nb_symb_sch,nb_rb,harq_process->R);
-
-  if ((harq_process->R)<1024)
-    Coderate = (float) (harq_process->R) /(float) 1024;
-  else
-    Coderate = (float) (harq_process->R) /(float) 2048;
-
-  if ((A <= 292) || ((A <= NR_MAX_PDSCH_TBS) && (Coderate <= 0.6667)) || Coderate <= 0.25) {
-    p_decParams->BG = 2;
-    kc = 52;
-
-    if (Coderate < 0.3333) {
-      p_decParams->R = 15;
-    } else if (Coderate <0.6667) {
-      p_decParams->R = 13;
-    } else {
-      p_decParams->R = 23;
-    }
-  } else {
-    p_decParams->BG = 1;
-    kc = 68;
-
-    if (Coderate < 0.6667) {
-      p_decParams->R = 13;
-    } else if (Coderate <0.8889) {
-      p_decParams->R = 23;
-    } else {
-      p_decParams->R = 89;
-    }
-  }
-
-  if (harq_process->first_rx == 1) {
-    // This is a new packet, so compute quantities regarding segmentation
-    if (A > NR_MAX_PDSCH_TBS)
-      harq_process->B = A+24;
-    else
-      harq_process->B = A+16;
-
-    nr_segmentation(NULL,
-                    NULL,
-                    harq_process->B,
-                    &harq_process->C,
-                    &harq_process->K,
-                    &harq_process->Z,
-                    &harq_process->F,
-                    p_decParams->BG);
-    p_decParams->Z = harq_process->Z;
+  for (r=0; r<nbDecode; r++) {
+    notifiedFIFO_elt_t *req=pullTpool(&nf, &(pool_dl));
+    bool last = false;
+    if (r == nbDecode - 1)
+      last = true;
+    bool stop = nr_ue_postDecode(phy_vars_ue, req, last, &nf);
+    delNotifiedFIFO_elt(req);
+    if (stop)
+      break;
   }
-  LOG_D(PHY,"round %d Z %d K %d BG %d\n", harq_process->round, p_decParams->Z, harq_process->K, p_decParams->BG);
-  p_decParams->numMaxIter = dlsch->max_ldpc_iterations;
-  p_decParams->outMode= 0;
-  err_flag = 0;
-  opp_enabled=1;
-  Qm= harq_process->Qm;
-  Nl=harq_process->Nl;
-  //r_thread = harq_process->C/2-1;
-  C= harq_process->C;
-  Cprime = C; //assume CBGTI not present
-
-  if (r <= Cprime - ((G/(Nl*Qm))%Cprime) - 1)
-    r_offset = Nl*Qm*(G/(Nl*Qm*Cprime));
-  else
-    r_offset = Nl*Qm*((G/(Nl*Qm*Cprime))+1);
-
-  //for (r=(harq_process->C/2); r<harq_process->C; r++) {
-  //    r=1; //(harq_process->C/2);
-  r_offset = r*r_offset;
-  Kr = harq_process->K;
-  Kr_bytes = Kr>>3;
-  K_bits_F = Kr-harq_process->F;
-  E = nr_get_E(G, harq_process->C, harq_process->Qm, harq_process->Nl, r);
-  start_meas(dlsch_deinterleaving_stats);
-  nr_deinterleaving_ldpc(E,
-                         harq_process->Qm,
-                         harq_process->w[r],
-                         dlsch_llr+r_offset);
-
-  if (LOG_DEBUGFLAG(DEBUG_DLSCH_DECOD))
-    for (int i =0; i<16; i++)
-      LOG_D(PHY,"rx output thread 0 deinterleaving w[%d]= %d r_offset %u\n", i,harq_process->w[r][i], r_offset);
-
-  stop_meas(dlsch_deinterleaving_stats);
-  start_meas(dlsch_rate_unmatching_stats);
-  if (LOG_DEBUGFLAG(DEBUG_DLSCH_DECOD))
-    LOG_I(PHY,"HARQ_PID %d Rate Matching Segment %d (coded bits %d,unpunctured/repeated bits %d, TBS %d, mod_order %d, nb_rb %d, Nl %d, rv %d, round %d)...\n",
-          harq_pid,r, G,
-          Kr*3,
-          harq_process->TBS,
-          harq_process->Qm,
-          harq_process->nb_rb,
-          harq_process->Nl,
-          harq_process->rvidx,
-          harq_process->round);
 
-  if (Nl<4)
-    Tbslbrm = nr_compute_tbslbrm(harq_process->mcs_table,nb_rb,Nl);
-  else
-    Tbslbrm = nr_compute_tbslbrm(harq_process->mcs_table,nb_rb,4);
-
-  if (nr_rate_matching_ldpc_rx(Ilbrm,
-                               Tbslbrm,
-                               p_decParams->BG,
-                               p_decParams->Z,
-                               harq_process->d[r],
-                               harq_process->w[r],
-                               harq_process->C,
-                               harq_process->rvidx,
-                               (harq_process->first_rx==1)?1:0,
-                               E,
-                               harq_process->F,
-                               Kr-harq_process->F-2*(p_decParams->Z))==-1) {
-    stop_meas(dlsch_rate_unmatching_stats);
-    LOG_E(PHY,"dlsch_decoding.c: Problem in rate_matching\n");
-    //return(dlsch->max_ldpc_iterations);
-  } else {
-    stop_meas(dlsch_rate_unmatching_stats);
-  }
-
-  if (LOG_DEBUGFLAG(DEBUG_DLSCH_DECOD)) {
-    LOG_D(PHY,"decoder input(segment %u) :",r);
-
-    for (int i=0; i<(3*8*Kr_bytes)+12; i++)
-      LOG_D(PHY,"%d : %d\n",i,harq_process->d[r][i]);
-
-    LOG_D(PHY,"\n");
-  }
-
-  memset(harq_process->c[r],0,Kr_bytes);
-
-  if (harq_process->C == 1) {
-    if (A > NR_MAX_PDSCH_TBS)
-      crc_type = CRC24_A;
-    else
-      crc_type = CRC16;
-
-    length_dec = harq_process->B;
-  } else {
-    crc_type = CRC24_B;
-    length_dec = (harq_process->B+24*harq_process->C)/harq_process->C;
-  }
-
-  if (err_flag == 0) {
-    /*
-            LOG_D(PHY, "LDPC algo Kr=%d cb_cnt=%d C=%d nbRB=%d crc_type %d TBSInput=%d TBSHarq=%d TBSplus24=%d mcs=%d Qm=%d RIV=%d round=%d maxIter %d\n",
-                                Kr,r,harq_process->C,harq_process->nb_rb,crc_type,A,harq_process->TBS,
-                                harq_process->B,harq_process->mcs,harq_process->Qm,harq_process->rvidx,harq_process->round,dlsch->max_ldpc_iterations);
-    */
-    if (llr8_flag1) {
-      AssertFatal (Kr >= 256, "LDPC algo issue Kr=%d cb_cnt=%d C=%d nbRB=%d TBSInput=%d TBSHarq=%d TBSplus24=%d mcs=%d Qm=%d RIV=%d round=%d\n",
-                   Kr,r,harq_process->C,harq_process->nb_rb,A,harq_process->TBS,harq_process->B,harq_process->mcs,harq_process->Qm,harq_process->rvidx,harq_process->round);
-    }
-
-    start_meas(dlsch_turbo_decoding_stats);
-    //      LOG_D(PHY,"AbsSubframe %d.%d Start LDPC segment %d/%d \n",frame%1024,subframe,r,harq_process->C-1);
-    /*
-            for (int cnt =0; cnt < (kc-2)*p_decParams->Z; cnt++){
-                  inv_d[cnt] = (1)*harq_process->d[r][cnt];
-                  }
-    */
-    //set first 2*Z_c bits to zeros
-    memset(&z[0],0,2*harq_process->Z*sizeof(int16_t));
-    //set Filler bits
-    memset((&z[0]+K_bits_F),127,harq_process->F*sizeof(int16_t));
-    //Move coded bits before filler bits
-    memcpy((&z[0]+2*harq_process->Z),harq_process->d[r],(K_bits_F-2*harq_process->Z)*sizeof(int16_t));
-    //skip filler bits
-    memcpy((&z[0]+Kr),harq_process->d[r]+(Kr-2*harq_process->Z),(kc*harq_process->Z-Kr)*sizeof(int16_t));
-
-    //Saturate coded bits before decoding into 8 bits values
-    for (i=0, j=0; j < ((kc*harq_process->Z)>>4)+1;  i+=2, j++) {
-      pl[j] = _mm_packs_epi16(pv[i],pv[i+1]);
-    }
-    p_decParams->block_length=length_dec;
-    nrLDPC_initcall(p_decParams, (int8_t*)&pl[0], llrProcBuf);
-    no_iteration_ldpc = nrLDPC_decoder(p_decParams,
-                                       (int8_t *)&pl[0],
-                                       llrProcBuf,
-                                       p_nrLDPC_procBuf,
-                                       p_procTime);
-
-    // Fixme: correct type is unsigned, but nrLDPC_decoder and all called behind use signed int
-    if (check_crc((uint8_t *)llrProcBuf,length_dec,harq_process->F,crc_type)) {
-      LOG_D(PHY,"Segment %u CRC OK\n",r);
-      ret = 2;
-    } else {
-      LOG_D(PHY,"Segment %u CRC NOK\n",r);
-      ret = 1+dlsch->max_ldpc_iterations;
-    }
-
-    if (no_iteration_ldpc > 10)
-      LOG_D(PHY,"Error number of iteration LPDC %d\n", no_iteration_ldpc);
-
-    for (int m=0; m < Kr>>3; m ++) {
-      harq_process->c[r][m]= (uint8_t) llrProcBuf[m];
-    }
-
-    if ( LOG_DEBUGFLAG(DEBUG_DLSCH_DECOD))
-      for (int k=0; k<2; k++)
-        LOG_D(PHY,"segment 1 output decoder [%d] =  0x%02x \n", k, harq_process->c[r][k]);
-    stop_meas(dlsch_turbo_decoding_stats);
-  }
-
-  if ((err_flag == 0) && (ret>=(1+dlsch->max_ldpc_iterations))) {// a Code segment is in error so break;
-    //      LOG_D(PHY,"AbsSubframe %d.%d CRC failed, segment %d/%d \n",frame%1024,subframe,r,harq_process->C-1);
-    err_flag = 1;
-  }
-
-  //}
-  proc->decoder_thread_available = 1;
-  //proc->decoder_main_available = 0;
-}
-
-void *dlsch_thread(void *arg) {
-  //this thread should be over the processing thread to keep in real time
-  notifiedFIFO_t nf;
-  initNotifiedFIFO(&nf);
-  notifiedFIFO_elt_t *res_dl;
-  initNotifiedFIFO_nothreadSafe(&freeBlocks_dl);
-
-  for (int i=0; i<tpool_nbthreads(pool_dl)+1; i++) {
-    pushNotifiedFIFO_nothreadSafe(&freeBlocks_dl,
-                                  newNotifiedFIFO_elt(sizeof(nr_rxtx_thread_data_t), 0,&nf,nr_dlsch_decoding_process));
-  }
-
-  while (!oai_exit) {
-    notifiedFIFO_elt_t *res;
-
-    while (nbDlProcessing >= tpool_nbthreads(pool_dl)) {
-      if ( (res=tryPullTpool(&nf, &pool_dl)) != NULL ) {
-        //nbDlProcessing--;
-        pushNotifiedFIFO_nothreadSafe(&freeBlocks_dl,res);
-      }
-
-      usleep(200);
-    }
-
-    res_dl=pullTpool(&nf, &pool_dl);
-    nbDlProcessing--;
-    pushNotifiedFIFO_nothreadSafe(&freeBlocks_dl,res_dl);
-    //msgToPush->key=0;
-    //pushTpool(Tpool, msgToPush);
-  } // while !oai_exit
-
-  return NULL;
+  VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_DLSCH_COMBINE_SEG, VCD_FUNCTION_OUT);
+  ret = dlsch->last_iteration_cnt;
+  return(ret);
 }
-
-
diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_transport_proto_ue.h b/openair1/PHY/NR_UE_TRANSPORT/nr_transport_proto_ue.h
index 046ca927a6a5dc39b72f52cae9a1c3cfc134b36a..0ab17c11e687c02a8655e08718990e7bdccc0e18 100644
--- a/openair1/PHY/NR_UE_TRANSPORT/nr_transport_proto_ue.h
+++ b/openair1/PHY/NR_UE_TRANSPORT/nr_transport_proto_ue.h
@@ -1087,22 +1087,6 @@ uint8_t nr_ue_pusch_common_procedures(PHY_VARS_NR_UE *UE,
                                       NR_DL_FRAME_PARMS *frame_parms,
                                       uint8_t Nl);
 
-
-
-uint32_t  nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
-                         UE_nr_rxtx_proc_t *proc,
-                         int eNB_id,
-                         short *dlsch_llr,
-                         NR_DL_FRAME_PARMS *frame_parms,
-                         NR_UE_DLSCH_t *dlsch,
-                         NR_DL_UE_HARQ_t *harq_process,
-                         uint32_t frame,
-                         uint16_t nb_symb_sch,
-                         uint8_t nr_slot_rx,
-                         uint8_t harq_pid,
-                         uint8_t is_crnti,
-                         uint8_t llr8_flag);
-
 void *nr_dlsch_decoding_2thread0(void *arg);
 
 void *nr_dlsch_decoding_2thread1(void *arg);
@@ -1750,8 +1734,6 @@ int nr_rx_pdsch(PHY_VARS_NR_UE *ue,
 int32_t generate_nr_prach(PHY_VARS_NR_UE *ue, uint8_t gNB_id, uint8_t subframe);
 
 void dump_nrdlsch(PHY_VARS_NR_UE *ue,uint8_t gNB_id,uint8_t nr_slot_rx,unsigned int *coded_bits_per_codeword,int round,  unsigned char harq_pid);
-
-void *dlsch_thread(void *arg);
 /**@}*/
 #endif
 
diff --git a/openair1/PHY/defs_nr_UE.h b/openair1/PHY/defs_nr_UE.h
index 027e73ea4a19b25b4325d1103fc34ffd73d9b7cb..69978f55482aa4675ad4600f7f0dacc44d480330 100644
--- a/openair1/PHY/defs_nr_UE.h
+++ b/openair1/PHY/defs_nr_UE.h
@@ -1078,5 +1078,27 @@ typedef struct nr_rxtx_thread_data_s {
   notifiedFIFO_t txFifo;
 }  nr_rxtx_thread_data_t;
 
+typedef struct LDPCDecode_ue_s {
+  PHY_VARS_NR_UE *phy_vars_ue;
+  NR_DL_UE_HARQ_t *harq_process;
+  t_nrLDPC_dec_params decoderParms;
+  NR_UE_DLSCH_t *dlsch;
+  short* dlsch_llr;
+  int dlsch_id;
+  int harq_pid;
+  int rv_index;
+  int A;
+  int E;
+  int Kc;
+  int Qm;
+  int Kr_bytes;
+  int nbSegments;
+  int segment_r;
+  int r_offset;
+  int offset;
+  int Tbslbrm;
+  int decodeIterations;
+} ldpcDecode_ue_t;
+
 #include "SIMULATION/ETH_TRANSPORT/defs.h"
 #endif
diff --git a/openair1/SCHED_NR_UE/phy_procedures_nr_ue.c b/openair1/SCHED_NR_UE/phy_procedures_nr_ue.c
index e1b87185bd182394d1fea8cef77d71f1e53d9160..940931694c9ad84f63bc3261fd9fc9721520a124 100644
--- a/openair1/SCHED_NR_UE/phy_procedures_nr_ue.c
+++ b/openair1/SCHED_NR_UE/phy_procedures_nr_ue.c
@@ -981,37 +981,23 @@ bool nr_ue_dlsch_procedures(PHY_VARS_NR_UE *ue,
 
    start_meas(&ue->dlsch_decoding_stats[proc->thread_id]);
 
+    ret = nr_dlsch_decoding(ue,
+                            proc,
+                            gNB_id,
+                            pdsch_vars->llr[0],
+                            &ue->frame_parms,
+                            dlsch0,
+                            dlsch0->harq_processes[harq_pid],
+                            frame_rx,
+                            nb_symb_sch,
+                            nr_slot_rx,
+                            harq_pid,
+                            pdsch==PDSCH?1:0,
+                            dlsch0->harq_processes[harq_pid]->TBS>256?1:0);
     if( dlsch_parallel) {
-      ret = nr_dlsch_decoding_mthread(ue,
-                                      proc,
-                                      gNB_id,
-                                      pdsch_vars->llr[0],
-                                      &ue->frame_parms,
-                                      dlsch0,
-                                      dlsch0->harq_processes[harq_pid],
-                                      frame_rx,
-                                      nb_symb_sch,
-                                      nr_slot_rx,
-                                      harq_pid,
-                                      pdsch==PDSCH?1:0,
-                                      dlsch0->harq_processes[harq_pid]->TBS>256?1:0);
-
       LOG_T(PHY,"dlsch decoding is parallelized, ret = %d\n", ret);
     }
     else {
-      ret = nr_dlsch_decoding(ue,
-                              proc,
-                              gNB_id,
-                              pdsch_vars->llr[0],
-                              &ue->frame_parms,
-                              dlsch0,
-                              dlsch0->harq_processes[harq_pid],
-                              frame_rx,
-                              nb_symb_sch,
-                              nr_slot_rx,
-                              harq_pid,
-                              pdsch==PDSCH?1:0,
-                              dlsch0->harq_processes[harq_pid]->TBS>256?1:0);
       LOG_T(PHY,"Sequential dlsch decoding , ret = %d\n", ret);
     }
 
@@ -1083,36 +1069,24 @@ bool nr_ue_dlsch_procedures(PHY_VARS_NR_UE *ue,
       start_meas(&ue->dlsch_decoding_stats[proc->thread_id]);
 
 
+      ret1 = nr_dlsch_decoding(ue,
+                               proc,
+                               gNB_id,
+                               pdsch_vars->llr[1],
+                               &ue->frame_parms,
+                               dlsch1,
+                               dlsch1->harq_processes[harq_pid],
+                               frame_rx,
+                               nb_symb_sch,
+                               nr_slot_rx,
+                               harq_pid,
+                               pdsch==PDSCH?1:0,//proc->decoder_switch,
+                               dlsch1->harq_processes[harq_pid]->TBS>256?1:0);
       if(dlsch_parallel) {
-        ret1 = nr_dlsch_decoding_mthread(ue,
-                                         proc,
-                                         gNB_id,
-                                         pdsch_vars->llr[1],
-                                         &ue->frame_parms,
-                                         dlsch1,
-                                         dlsch1->harq_processes[harq_pid],
-                                         frame_rx,
-                                         nb_symb_sch,
-				         nr_slot_rx,
-                                         harq_pid,
-                                         pdsch==PDSCH?1:0,
-                                         dlsch1->harq_processes[harq_pid]->TBS>256?1:0);
         LOG_T(PHY,"CW dlsch decoding is parallelized, ret1 = %d\n", ret1);
       }
       else {
-        ret1 = nr_dlsch_decoding(ue,
-                                 proc,
-                                 gNB_id,
-                                 pdsch_vars->llr[1],
-                                 &ue->frame_parms,
-                                 dlsch1,
-                                 dlsch1->harq_processes[harq_pid],
-                                 frame_rx,
-                                 nb_symb_sch,
-                                 nr_slot_rx,
-                                 harq_pid,
-                                 pdsch==PDSCH?1:0,//proc->decoder_switch,
-                                 dlsch1->harq_processes[harq_pid]->TBS>256?1:0);
+
         LOG_T(PHY,"CWW sequential dlsch decoding, ret1 = %d\n", ret1);
       }
 
diff --git a/openair1/SIMULATION/NR_PHY/dlschsim.c b/openair1/SIMULATION/NR_PHY/dlschsim.c
index a0e0183fd1f288e2363205254e0ac15a9ad2585a..1d9584b30b1a2709a971bf942546dcd479e98a48 100644
--- a/openair1/SIMULATION/NR_PHY/dlschsim.c
+++ b/openair1/SIMULATION/NR_PHY/dlschsim.c
@@ -109,6 +109,7 @@ int main(int argc, char **argv)
 	int ret;
 	//int run_initial_sync=0;
 	int loglvl = OAILOG_WARNING;
+	uint8_t dlsch_threads = 0;
 	float target_error_rate = 0.01;
         uint64_t SSB_positions=0x01;
 	uint16_t nb_symb_sch = 12;
@@ -138,9 +139,9 @@ int main(int argc, char **argv)
 
 			break;*/
 
-		/*case 'd':
-			frame_type = 1;
-			break;*/
+		case 'd':
+			dlsch_threads = atoi(optarg);
+			break;
 
 		case 'g':
 			switch ((char) *optarg) {
@@ -320,6 +321,7 @@ int main(int argc, char **argv)
 			//printf("-C Generate Calibration information for Abstraction (effective SNR adjustment to remove Pe bias w.r.t. AWGN)\n");
 			//printf("-f Output filename (.txt format) for Pe/SNR results\n");
 			printf("-F Input filename (.txt format) for RX conformance testing\n");
+			printf("-d number of dlsch threads, 0: no dlsch parallelization\n");
 			exit(-1);
 			break;
 		}
@@ -331,6 +333,7 @@ int main(int argc, char **argv)
 
 	if (snr1set == 0)
 		snr1 = snr0 + 10;
+	init_dlsch_tpool(dlsch_threads);
 
 	if (ouput_vcd)
         vcd_signal_dumper_init("/tmp/openair_dump_nr_dlschsim.vcd");
diff --git a/openair1/SIMULATION/NR_PHY/dlsim.c b/openair1/SIMULATION/NR_PHY/dlsim.c
index 51fdfc917e6d5bdef2c953de54efb29ce09c8f98..1eb6027936426b9f95a8e984f2288c77b7ecd374 100644
--- a/openair1/SIMULATION/NR_PHY/dlsim.c
+++ b/openair1/SIMULATION/NR_PHY/dlsim.c
@@ -688,7 +688,7 @@ int main(int argc, char **argv)
 
   if (snr1set==0)
     snr1 = snr0+10;
-
+  init_dlsch_tpool(dlsch_threads);
 
 
   RC.gNB = (PHY_VARS_gNB**) malloc(sizeof(PHY_VARS_gNB *));
@@ -980,11 +980,7 @@ int main(int argc, char **argv)
   reset_meas(&msgDataTx->phy_proc_tx);
   gNB->phy_proc_tx_0 = &msgDataTx->phy_proc_tx;
   pushTpool(gNB->threadPool,msgL1Tx);
-  if (dlsch_threads ) { 
-    init_dlsch_tpool(dlsch_threads);
-    pthread_t dlsch0_threads;
-    threadCreate(&dlsch0_threads, dlsch_thread, (void *)UE, "DLthread", -1, OAI_PRIORITY_RT_MAX-1);
-  }
+
   for (SNR = snr0; SNR < snr1; SNR += .2) {
 
     varArray_t *table_tx=initVarArray(1000,sizeof(double));
diff --git a/openair1/SIMULATION/NR_PHY/pucchsim.c b/openair1/SIMULATION/NR_PHY/pucchsim.c
index 0aed6d9780476f42f6dc97744ba9388a3d01d6bc..213924e5fd7425509d75ebacf387afb594ea32ef 100644
--- a/openair1/SIMULATION/NR_PHY/pucchsim.c
+++ b/openair1/SIMULATION/NR_PHY/pucchsim.c
@@ -662,14 +662,19 @@ int main(int argc, char **argv)
           if (uci_pdu.sr->sr_indication == 0 || uci_pdu.sr->sr_confidence_level == 1)
             sr_errors+=1;
         }
+        // harq value 0 -> pass
+        // confidence value 0 -> good confidence
+        const int harq_value0 = uci_pdu.harq->harq_list[0].harq_value;
+        const int harq_value1 = uci_pdu.harq->harq_list[1].harq_value;
+        const int confidence_lvl = uci_pdu.harq->harq_confidence_level;
         if(nr_bit>0){
           if (nr_bit==1 && do_DTX == 0)
-            ack_nack_errors+=(actual_payload^uci_pdu.harq->harq_list[0].harq_value);
+            ack_nack_errors+=(actual_payload^(!harq_value0));
           else if (do_DTX == 0)
-            ack_nack_errors+=(((actual_payload&1)^uci_pdu.harq->harq_list[0].harq_value)+((actual_payload>>1)^uci_pdu.harq->harq_list[1].harq_value));
-          else if ((uci_pdu.harq->harq_confidence_level == 0 && uci_pdu.harq->harq_list[0].harq_value == 1) ||
-                         (uci_pdu.harq->harq_confidence_level == 0 && nr_bit == 2 && uci_pdu.harq->harq_list[1].harq_value==1))
-                  ack_nack_errors++;
+            ack_nack_errors+=(((actual_payload&1)^(!harq_value0))+((actual_payload>>1)^(!harq_value1)));
+          else if ((!confidence_lvl && !harq_value0) ||
+                   (!confidence_lvl && nr_bit == 2 && !harq_value1))
+            ack_nack_errors++;
           free(uci_pdu.harq->harq_list);
         }
       }
diff --git a/openair2/LAYER2/NR_MAC_gNB/config.c b/openair2/LAYER2/NR_MAC_gNB/config.c
index cedd0957941e4905cf576e52c4ab356cc5970ba4..1cb3fdb7084eeecd4a479a7dc0b66ec300a70402 100644
--- a/openair2/LAYER2/NR_MAC_gNB/config.c
+++ b/openair2/LAYER2/NR_MAC_gNB/config.c
@@ -610,12 +610,25 @@ int rrc_mac_config_req_gNB(module_id_t Mod_idP,
       UE_info->CellGroup[UE_id] = CellGroup;
       LOG_I(NR_MAC,"Modified UE_id %d/%x with CellGroup\n",UE_id,rnti);
       process_CellGroup(CellGroup,&UE_info->UE_sched_ctrl[UE_id]);
+      const NR_ServingCellConfig_t *servingCellConfig = CellGroup ? CellGroup->spCellConfig->spCellConfigDedicated : NULL;
+      NR_UE_sched_ctrl_t *sched_ctrl = &UE_info->UE_sched_ctrl[UE_id];
+      const NR_PDSCH_ServingCellConfig_t *pdsch = servingCellConfig ? servingCellConfig->pdsch_ServingCellConfig->choice.setup : NULL;
+      if (sched_ctrl->available_dl_harq.len == 0) {
+        // add all available DL HARQ processes for this UE in SA
+        create_dl_harq_list(sched_ctrl, pdsch);
+      }
+      else {
+        const int nrofHARQ = pdsch && pdsch->nrofHARQ_ProcessesForPDSCH ?
+                             get_nrofHARQ_ProcessesForPDSCH(*pdsch->nrofHARQ_ProcessesForPDSCH) : 8;
+        AssertFatal(sched_ctrl->available_dl_harq.len==nrofHARQ,
+                    "Reconfiguration of available harq processes not yet supported\n");
+      }
       // update coreset/searchspace
       void *bwpd = NULL;
       target_ss = NR_SearchSpace__searchSpaceType_PR_common;
-      if ((UE_info->UE_sched_ctrl[UE_id].active_bwp)) {
+      if ((sched_ctrl->active_bwp)) {
         target_ss = NR_SearchSpace__searchSpaceType_PR_ue_Specific;
-        bwpd = (void*)UE_info->UE_sched_ctrl[UE_id].active_bwp->bwp_Dedicated;
+        bwpd = (void*)sched_ctrl->active_bwp->bwp_Dedicated;
       }
       else if (CellGroup->spCellConfig &&
                  CellGroup->spCellConfig->spCellConfigDedicated &&
@@ -623,9 +636,9 @@ int rrc_mac_config_req_gNB(module_id_t Mod_idP,
         target_ss = NR_SearchSpace__searchSpaceType_PR_ue_Specific;
         bwpd = (void*)CellGroup->spCellConfig->spCellConfigDedicated->initialDownlinkBWP;
       }
-      UE_info->UE_sched_ctrl[UE_id].search_space = get_searchspace(scc, bwpd, target_ss);
-      UE_info->UE_sched_ctrl[UE_id].coreset = get_coreset(Mod_idP, scc, bwpd, UE_info->UE_sched_ctrl[UE_id].search_space, target_ss);
-      UE_info->UE_sched_ctrl[UE_id].maxL = 2;
+      sched_ctrl->search_space = get_searchspace(scc, bwpd, target_ss);
+      sched_ctrl->coreset = get_coreset(Mod_idP, scc, bwpd, sched_ctrl->search_space, target_ss);
+      sched_ctrl->maxL = 2;
     }
   }
   VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_RRC_MAC_CONFIG, VCD_FUNCTION_OUT);
diff --git a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_dlsch.c b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_dlsch.c
index 0e01702d77f5ff73b45c85cb764b326a18565dc9..abc007197d93eaf5654b383ad16034ce3459fa17 100644
--- a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_dlsch.c
+++ b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_dlsch.c
@@ -1198,6 +1198,9 @@ void nr_schedule_ue_spec(module_id_t module_id,
                   "UE %d mismatch between scheduled TBS and buffered TB for HARQ PID %d\n",
                   UE_id,
                   current_harq_pid);
+
+      T(T_GNB_MAC_RETRANSMISSION_DL_PDU_WITH_DATA, T_INT(module_id), T_INT(CC_id), T_INT(rnti),
+        T_INT(frame), T_INT(slot), T_INT(current_harq_pid), T_INT(harq->round), T_BUFFER(harq->tb, TBS));
     } else { /* initial transmission */
 
       LOG_D(NR_MAC, "[%s] Initial HARQ transmission in %d.%d\n", __FUNCTION__, frame, slot);
diff --git a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_primitives.c b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_primitives.c
index 7387bfe698e3de7e7e29878f39e9bf7aa1ee4f11..1c0cb0353e7c97c9ea421e5e7ce84526679d5439 100644
--- a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_primitives.c
+++ b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_primitives.c
@@ -1971,8 +1971,8 @@ int add_new_nr_ue(module_id_t mod_idP, rnti_t rntiP, NR_CellGroupConfig_t *CellG
     if (bwpList) AssertFatal(bwpList->list.count == 1,
 			     "downlinkBWP_ToAddModList has %d BWP!\n",
 			     bwpList->list.count);
-    const int bwp_id = 1;
-    sched_ctrl->active_bwp = bwpList ? bwpList->list.array[bwp_id - 1] : NULL;
+    const int bwp_id = servingCellConfig ? *servingCellConfig->firstActiveDownlinkBWP_Id : 0;
+    sched_ctrl->active_bwp = bwpList && bwp_id > 0 ? bwpList->list.array[bwp_id - 1] : NULL;
     const int target_ss = sched_ctrl->active_bwp ? NR_SearchSpace__searchSpaceType_PR_ue_Specific : NR_SearchSpace__searchSpaceType_PR_common;
     sched_ctrl->search_space = get_searchspace(scc,
                                                sched_ctrl->active_bwp ? sched_ctrl->active_bwp->bwp_Dedicated : NULL,
@@ -1984,23 +1984,20 @@ int add_new_nr_ue(module_id_t mod_idP, rnti_t rntiP, NR_CellGroupConfig_t *CellG
     if (ubwpList) AssertFatal(ubwpList->list.count == 1,
 			      "uplinkBWP_ToAddModList has %d BWP!\n",
 			      ubwpList->list.count);
-    sched_ctrl->active_ubwp = ubwpList ? ubwpList->list.array[bwp_id - 1] : NULL;
+    const int ul_bwp_id = servingCellConfig ? *servingCellConfig->uplinkConfig->firstActiveUplinkBWP_Id : 0;
+    sched_ctrl->active_ubwp = ubwpList && ul_bwp_id > 0 ? ubwpList->list.array[ul_bwp_id - 1] : NULL;
 
     /* get Number of HARQ processes for this UE */
     if (servingCellConfig) AssertFatal(servingCellConfig->pdsch_ServingCellConfig->present == NR_SetupRelease_PDSCH_ServingCellConfig_PR_setup,
 				       "no pdsch-ServingCellConfig found for UE %d\n",
 				       UE_id);
     const NR_PDSCH_ServingCellConfig_t *pdsch = servingCellConfig ? servingCellConfig->pdsch_ServingCellConfig->choice.setup : NULL;
-    const int nrofHARQ = pdsch ? (pdsch->nrofHARQ_ProcessesForPDSCH ?
-				  get_nrofHARQ_ProcessesForPDSCH(*pdsch->nrofHARQ_ProcessesForPDSCH) : 8) : 8;
-    // add all available DL HARQ processes for this UE
-    create_nr_list(&sched_ctrl->available_dl_harq, nrofHARQ);
-    for (int harq = 0; harq < nrofHARQ; harq++)
-      add_tail_nr_list(&sched_ctrl->available_dl_harq, harq);
-    create_nr_list(&sched_ctrl->feedback_dl_harq, nrofHARQ);
-    create_nr_list(&sched_ctrl->retrans_dl_harq, nrofHARQ);
-
+    // add DL HARQ processes for this UE only in NSA
+    // (SA still doesn't have information on nrofHARQ_ProcessesForPDSCH at this stage)
+    if (!get_softmodem_params()->sa)
+      create_dl_harq_list(sched_ctrl,pdsch);
     // add all available UL HARQ processes for this UE
+    // nb of ul harq processes not configurable
     create_nr_list(&sched_ctrl->available_ul_harq, 16);
     for (int harq = 0; harq < 16; harq++)
       add_tail_nr_list(&sched_ctrl->available_ul_harq, harq);
@@ -2020,6 +2017,19 @@ int add_new_nr_ue(module_id_t mod_idP, rnti_t rntiP, NR_CellGroupConfig_t *CellG
   return -1;
 }
 
+
+void create_dl_harq_list(NR_UE_sched_ctrl_t *sched_ctrl,
+                         const NR_PDSCH_ServingCellConfig_t *pdsch) {
+  const int nrofHARQ = pdsch && pdsch->nrofHARQ_ProcessesForPDSCH ?
+                       get_nrofHARQ_ProcessesForPDSCH(*pdsch->nrofHARQ_ProcessesForPDSCH) : 8;
+  // add all available DL HARQ processes for this UE
+  create_nr_list(&sched_ctrl->available_dl_harq, nrofHARQ);
+  for (int harq = 0; harq < nrofHARQ; harq++)
+    add_tail_nr_list(&sched_ctrl->available_dl_harq, harq);
+  create_nr_list(&sched_ctrl->feedback_dl_harq, nrofHARQ);
+  create_nr_list(&sched_ctrl->retrans_dl_harq, nrofHARQ);
+}
+
 /* hack data to remove UE in the phy */
 int rnti_to_remove[10];
 volatile int rnti_to_remove_count;
diff --git a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_uci.c b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_uci.c
index 3a359552da188a54349b545579902433ea9eac76..c8a49a2243d3b6f25013a412ad7c027f7f3fe4a7 100644
--- a/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_uci.c
+++ b/openair2/LAYER2/NR_MAC_gNB/gNB_scheduler_uci.c
@@ -1099,7 +1099,7 @@ void handle_nr_uci_pucch_0_1(module_id_t mod_id,
       DevAssert(harq->is_waiting);
       const int8_t pid = sched_ctrl->feedback_dl_harq.head;
       remove_front_nr_list(&sched_ctrl->feedback_dl_harq);
-      handle_dl_harq(mod_id, UE_id, pid, harq_value == 1 && harq_confidence == 0);
+      handle_dl_harq(mod_id, UE_id, pid, harq_value == 0 && harq_confidence == 0);
       if (harq_confidence == 1)  UE_info->mac_stats[UE_id].pucch0_DTX++;
     }
   }
diff --git a/openair2/LAYER2/NR_MAC_gNB/mac_proto.h b/openair2/LAYER2/NR_MAC_gNB/mac_proto.h
index 62d70f4f2cccac09236f689c187112add61ec855..91221276b06d8ddb95b9d82322f082a0e6305f1f 100644
--- a/openair2/LAYER2/NR_MAC_gNB/mac_proto.h
+++ b/openair2/LAYER2/NR_MAC_gNB/mac_proto.h
@@ -306,6 +306,8 @@ int get_spf(nfapi_nr_config_request_scf_t *cfg);
 
 int to_absslot(nfapi_nr_config_request_scf_t *cfg,int frame,int slot);
 
+int get_nrofHARQ_ProcessesForPDSCH(e_NR_PDSCH_ServingCellConfig__nrofHARQ_ProcessesForPDSCH n);
+
 void nr_get_tbs_dl(nfapi_nr_dl_tti_pdsch_pdu *pdsch_pdu,
 		   int x_overhead,
                    uint8_t numdmrscdmgroupnodata,
@@ -409,6 +411,9 @@ void nr_rx_sdu(const module_id_t gnb_mod_idP,
                const uint8_t ul_cqi,
                const uint16_t rssi);
 
+void create_dl_harq_list(NR_UE_sched_ctrl_t *sched_ctrl,
+                         const NR_PDSCH_ServingCellConfig_t *pdsch);
+
 void handle_nr_ul_harq(const int CC_idP,
                        module_id_t mod_id,
                        frame_t frame,