From 454c552de225f7a1981892825dd49e7a9a0338e4 Mon Sep 17 00:00:00 2001
From: Hongzhi Wang <hongzhi.wang@tcl.com>
Date: Thu, 10 Jan 2019 10:16:36 +0100
Subject: [PATCH] Test passed for multithreading dlsch_decoding

---
 openair1/PHY/INIT/nr_init_ue.c                |  4 +-
 .../NR_TRANSPORT/nr_transport_common_proto.h  |  3 +-
 .../PHY/NR_UE_TRANSPORT/nr_dlsch_decoding.c   | 87 ++++++++++++++-----
 .../PHY/NR_UE_TRANSPORT/nr_transport_ue.h     |  4 +-
 4 files changed, 70 insertions(+), 28 deletions(-)

diff --git a/openair1/PHY/INIT/nr_init_ue.c b/openair1/PHY/INIT/nr_init_ue.c
index 1e8f2bc9904..51d881b13e6 100644
--- a/openair1/PHY/INIT/nr_init_ue.c
+++ b/openair1/PHY/INIT/nr_init_ue.c
@@ -942,9 +942,7 @@ void init_nr_ue_transport(PHY_VARS_NR_UE *ue,int abstraction_flag) {
 void phy_init_nr_top(PHY_VARS_NR_UE *ue)
 {
   NR_DL_FRAME_PARMS *frame_parms = &ue->frame_parms;
-  NR_UE_DLSCH_t *dlsch0 = ue->dlsch[0][0][0];
-  dlsch0 =(NR_UE_DLSCH_t *)malloc16(sizeof(NR_UE_DLSCH_t));
-  
+    
   crcTableInit();
 
   init_dfts();
diff --git a/openair1/PHY/NR_TRANSPORT/nr_transport_common_proto.h b/openair1/PHY/NR_TRANSPORT/nr_transport_common_proto.h
index 3c318874ace..b075da4e97f 100644
--- a/openair1/PHY/NR_TRANSPORT/nr_transport_common_proto.h
+++ b/openair1/PHY/NR_TRANSPORT/nr_transport_common_proto.h
@@ -1,3 +1,4 @@
+
 /*
  * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -33,7 +34,7 @@
 #ifndef __NR_TRANSPORT_COMMON_PROTO__H__
 #define __NR_TRANSPORT_COMMON_PROTO__H__
 
-#define MAX_NUM_NR_DLSCH_SEGMENTS 32
+#define MAX_NUM_NR_DLSCH_SEGMENTS 16
 #define MAX_NUM_NR_ULSCH_SEGMENTS MAX_NUM_NR_DLSCH_SEGMENTS
 
 #define MAX_NR_DLSCH_PAYLOAD_BYTES (MAX_NUM_NR_DLSCH_SEGMENTS*1056)
diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_decoding.c b/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_decoding.c
index 295673fdde9..26dd0af0dd5 100644
--- a/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_decoding.c
+++ b/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_decoding.c
@@ -45,8 +45,6 @@
 
 #define OAI_LDPC_MAX_NUM_LLR 27000//26112 // NR_LDPC_NCOL_BG1*NR_LDPC_ZMAX
 
-static int8_t llrProcBuf[OAI_LDPC_MAX_NUM_LLR] __attribute__ ((aligned(32)));
-
 static uint64_t nb_total_decod =0;
 static uint64_t nb_error_decod =0;
 
@@ -75,14 +73,19 @@ void free_nr_ue_dlsch(NR_UE_DLSCH_t *dlsch)
             free16(dlsch->harq_processes[i]->d[r],(3*8448)*sizeof(short));
             dlsch->harq_processes[i]->d[r] = NULL;
           }
+        
+        for (r=0; r<(MAX_NUM_NR_DLSCH_SEGMENTS); r++) {
+			if (dlsch->harq_processes[i]->p_nrLDPC_procBuf[r]){
+			  nrLDPC_free_mem(dlsch->harq_processes[i]->p_nrLDPC_procBuf[r]);
+			  dlsch->harq_processes[i]->p_nrLDPC_procBuf[r] = NULL;
+			}
+		}
 
         free16(dlsch->harq_processes[i],sizeof(NR_DL_UE_HARQ_t));
         dlsch->harq_processes[i] = NULL;
       }
     }
-
-    nrLDPC_free_mem(dlsch->p_nrLDPC_procBuf);
-    
+      
     free16(dlsch,sizeof(NR_UE_DLSCH_t));
     dlsch = NULL;
   }
@@ -122,8 +125,7 @@ NR_UE_DLSCH_t *new_nr_ue_dlsch(uint8_t Kmimo,uint8_t Mdlharq,uint32_t Nsoft,uint
     dlsch->Mdlharq = Mdlharq;
     dlsch->Nsoft = Nsoft;
     dlsch->max_ldpc_iterations = max_ldpc_iterations;
-    dlsch->p_nrLDPC_procBuf = nrLDPC_init_mem();
-
+ 
     for (i=0; i<Mdlharq; i++) {
       //      printf("new_ue_dlsch: Harq process %d\n",i);
       dlsch->harq_processes[i] = (NR_DL_UE_HARQ_t *)malloc16(sizeof(NR_DL_UE_HARQ_t));
@@ -139,7 +141,8 @@ NR_UE_DLSCH_t *new_nr_ue_dlsch(uint8_t Kmimo,uint8_t Mdlharq,uint32_t Nsoft,uint
           exit_flag=3;
 
         if (abstraction_flag == 0) {
-          for (r=0; r<MAX_NUM_DLSCH_SEGMENTS/bw_scaling; r++) {
+          for (r=0; r<MAX_NUM_NR_DLSCH_SEGMENTS/bw_scaling; r++) { 
+			dlsch->harq_processes[i]->p_nrLDPC_procBuf[r] = nrLDPC_init_mem();
             dlsch->harq_processes[i]->c[r] = (uint8_t*)malloc16(1056);
 
             if (dlsch->harq_processes[i]->c[r])
@@ -218,11 +221,12 @@ uint32_t  nr_dlsch_decoding(PHY_VARS_NR_UE *phy_vars_ue,
   //short dummy_w[MAX_NUM_DLSCH_SEGMENTS][3*(8448+64)];
   uint32_t r,r_offset=0,Kr=8424,Kr_bytes,K_bytes_F,err_flag=0;
   uint8_t crc_type;
+  int8_t llrProcBuf[OAI_LDPC_MAX_NUM_LLR] __attribute__ ((aligned(32)));
   t_nrLDPC_dec_params decParams;
   t_nrLDPC_dec_params* p_decParams = &decParams;
   t_nrLDPC_time_stats procTime;
   t_nrLDPC_time_stats* p_procTime =&procTime ;
-  t_nrLDPC_procBuf* p_nrLDPC_procBuf = dlsch->p_nrLDPC_procBuf;
+  t_nrLDPC_procBuf** p_nrLDPC_procBuf = harq_process->p_nrLDPC_procBuf;
     
   int16_t z [68*384];
   int8_t l [68*384];
@@ -511,7 +515,7 @@ uint32_t  nr_dlsch_decoding(PHY_VARS_NR_UE *phy_vars_ue,
 		no_iteration_ldpc = nrLDPC_decoder(p_decParams,
 						   (int8_t*)&pl[0],
 						   llrProcBuf,
-                           p_nrLDPC_procBuf,
+                           p_nrLDPC_procBuf[r],
                            p_procTime);
 
 		/*
@@ -698,7 +702,9 @@ uint32_t  nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
   t_nrLDPC_dec_params* p_decParams = &decParams;
   t_nrLDPC_time_stats procTime;
   t_nrLDPC_time_stats* p_procTime =&procTime ;
-  t_nrLDPC_procBuf* p_nrLDPC_procBuf = dlsch->p_nrLDPC_procBuf;
+  int8_t llrProcBuf[OAI_LDPC_MAX_NUM_LLR] __attribute__ ((aligned(32)));
+  t_nrLDPC_procBuf* p_nrLDPC_procBuf = harq_process->p_nrLDPC_procBuf[0];
+
   int16_t z [68*384];
   int8_t l [68*384];
   //__m128i l;
@@ -794,7 +800,7 @@ uint32_t  nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
  	kb = harq_process->K/harq_process->Z;
   	  if ( kb==22){
   		  p_decParams->BG = 1;
-  		  p_decParams->R = 89;
+  		  p_decParams->R = 13;
   		  kc = 68;
   	  }
   	  else{
@@ -833,7 +839,7 @@ uint32_t  nr_dlsch_decoding_mthread(PHY_VARS_NR_UE *phy_vars_ue,
     return((1+dlsch->max_ldpc_iterations));
   }
 #ifdef DEBUG_DLSCH_DECODING
-  printf("Segmentation: C %d, Cminus %d, Kminus %d, Kplus %d\n",harq_process->C,harq_process->Cminus,harq_process->Kminus,harq_process->Kplus);
+  printf("Segmentation: C %d, K %d\n",harq_process->C,harq_process->K);
 #endif
 
   opp_enabled=1;
@@ -963,7 +969,9 @@ if (harq_process->C>1) { // wakeup worker if more than 1 segment
      Cby2 = 1;
    }
 
-  for (r=0; r<Cby2; r++) {
+  //for (r=0; r<Cby2; r++) {
+	r = 0;  
+	if (r==0) r_offset =0;
 
     Kr = harq_process->K;
     Kr_bytes = Kr>>3;
@@ -1015,8 +1023,13 @@ if (harq_process->C>1) { // wakeup worker if more than 1 segment
       stop_meas(dlsch_rate_unmatching_stats);
 #endif
     }
-    r_offset += E;
+    //r_offset += E;
     //printf("main thread r_offset %d\n",r_offset);
+ 
+#ifdef DEBUG_DLSCH_DECODING   
+    for (int i =0; i<16; i++)
+             printf("rx output ratematching w[%d]= %d r_offset %d\n", i,harq_process->w[r][i], r_offset);
+#endif
 
     /*
     printf("Subblock deinterleaving, d %p w %p\n",
@@ -1030,6 +1043,11 @@ if (harq_process->C>1) { // wakeup worker if more than 1 segment
         		   harq_process->Qm,
                            harq_process->d[r],
                            harq_process->w[r]);
+#ifdef DEBUG_DLSCH_DECODING                           
+        for (int i =0; i<16; i++)
+              printf("rx output interleaving d[%d]= %d r_offset %d\n", i,harq_process->d[r][i], r_offset);
+#endif
+
 #if UE_TIMING_TRACE
     stop_meas(dlsch_deinterleaving_stats);
 #endif
@@ -1072,6 +1090,10 @@ if (harq_process->C>1) { // wakeup worker if more than 1 segment
         start_meas(dlsch_turbo_decoding_stats);
 #endif
       LOG_D(PHY,"mthread AbsSubframe %d.%d Start turbo segment %d/%d \n",frame%1024,nr_tti_rx,r,harq_process->C-1);
+      
+            for (int cnt =0; cnt < (kc-2)*p_decParams->Z; cnt++){
+            inv_d[cnt] = (1)*harq_process->d[r][cnt];
+            }
 
 		memset(pv,0,2*p_decParams->Z*sizeof(int16_t));
         //memset(pl,0,2*p_decParams->Z*sizeof(int8_t));
@@ -1109,6 +1131,16 @@ if (harq_process->C>1) { // wakeup worker if more than 1 segment
 		else {
 		  ret=2;
 		}
+		
+		if (check_crc(llrProcBuf,harq_process->B,harq_process->F,crc_type)) {
+		  printf("CRC OK\n");
+		  ret = 2;
+		}
+		else {
+		  printf("CRC NOK\n");
+		  ret = 1+dlsch->max_ldpc_iterations;
+		}
+		
 		//if (!nb_total_decod%10000){
 				printf("Error number of iteration LPDC %d %ld/%ld \n", no_iteration_ldpc, nb_error_decod,nb_total_decod);fflush(stdout);
 		//}
@@ -1139,7 +1171,10 @@ if (harq_process->C>1) { // wakeup worker if more than 1 segment
 		//printf("output channel decoder %d %d %d %d %d \n", harq_process->c[r][0], harq_process->c[r][1], harq_process->c[r][2],harq_process->c[r][3], harq_process->c[r][4]);
 
 		 //printf("output decoder %d %d %d %d %d \n", harq_process->c[r][0], harq_process->c[r][1], harq_process->c[r][2],harq_process->c[r][3], harq_process->c[r][4]);
-
+#ifdef DEBUG_DLSCH_DECODING
+	for (int k=0;k<32;k++)
+       		printf("output decoder [%d] =  0x%02x \n", k, harq_process->c[r][k]);
+#endif
 
 #if UE_TIMING_TRACE
       stop_meas(dlsch_turbo_decoding_stats);
@@ -1151,7 +1186,7 @@ if (harq_process->C>1) { // wakeup worker if more than 1 segment
       LOG_D(PHY,"AbsSubframe %d.%d CRC failed, segment %d/%d \n",frame%1024,nr_tti_rx,r,harq_process->C-1);
       err_flag = 1;
     }
-  }
+  //} //loop r
 
   int32_t frame_rx_prev = frame;
   int32_t tti_rx_prev = nr_tti_rx - 1;
@@ -1282,7 +1317,8 @@ void *nr_dlsch_decoding_2thread0(void *arg)
     t_nrLDPC_dec_params* p_decParams = &decParams;
     t_nrLDPC_time_stats procTime;
     t_nrLDPC_time_stats* p_procTime =&procTime ;
-    t_nrLDPC_procBuf* p_nrLDPC_procBuf = dlsch->p_nrLDPC_procBuf;
+    int8_t llrProcBuf[OAI_LDPC_MAX_NUM_LLR] __attribute__ ((aligned(32)));
+    t_nrLDPC_procBuf* p_nrLDPC_procBuf; 
     int16_t z [68*384];
     int8_t l [68*384];
     //__m128i l;
@@ -1375,6 +1411,7 @@ void *nr_dlsch_decoding_2thread0(void *arg)
 	        NR_DL_UE_HARQ_t *harq_process  = dlsch->harq_processes[harq_pid];
 	        short *dlsch_llr 				= phy_vars_ue->pdsch_vars[phy_vars_ue->current_thread_id[subframe]][eNB_id]->llr[0];
 	        //printf("2thread0 llr flag %d tdp flag %d\n",llr8_flag1, tdp->llr8_flag);
+	        p_nrLDPC_procBuf = harq_process->p_nrLDPC_procBuf[1];
 
   /*
   if (nb_rb > frame_parms->N_RB_DL) {
@@ -1420,7 +1457,7 @@ void *nr_dlsch_decoding_2thread0(void *arg)
   kb = harq_process->K/harq_process->Z;
 	  if ( kb==22){
 		  p_decParams->BG = 1;
-		  p_decParams->R = 89;
+		  p_decParams->R = 13;
 		  kc = 68;
 	  }
 	  else{
@@ -1533,7 +1570,7 @@ void *nr_dlsch_decoding_2thread0(void *arg)
       stop_meas(dlsch_rate_unmatching_stats);
 #endif
     }
-    r_offset += E;
+    //r_offset += E;
 
     //for (int i =0; i<16; i++)
     //    	printf("rx output ratematching w[%d]= %d r_offset %d\n", i,harq_process->w[r][i], r_offset);
@@ -1644,6 +1681,10 @@ void *nr_dlsch_decoding_2thread0(void *arg)
 
 		      	      	      	      	}
 		     	printf(" \n");*/
+#ifdef DEBUG_DLSCH_DECODING	     	
+	for (int k=0;k<2;k++)
+  		printf("segment 1 output decoder [%d] =  0x%02x \n", k, harq_process->c[r][k]);
+#endif 
 		
 #if UE_TIMING_TRACE
       stop_meas(dlsch_turbo_decoding_stats);
@@ -1789,7 +1830,8 @@ void *nr_dlsch_decoding_2thread1(void *arg)
     t_nrLDPC_dec_params* p_decParams = &decParams;
     t_nrLDPC_time_stats procTime;
     t_nrLDPC_time_stats* p_procTime =&procTime ;
-    t_nrLDPC_procBuf* p_nrLDPC_procBuf = dlsch->p_nrLDPC_procBuf;
+    t_nrLDPC_procBuf* p_nrLDPC_procBuf;
+    int8_t llrProcBuf[OAI_LDPC_MAX_NUM_LLR] __attribute__ ((aligned(32)));
     int16_t z [68*384];
     int8_t l [68*384];
     //__m128i l;
@@ -1883,6 +1925,7 @@ void *nr_dlsch_decoding_2thread1(void *arg)
 	        short *dlsch_llr 				= phy_vars_ue->pdsch_vars[phy_vars_ue->current_thread_id[subframe]][eNB_id]->llr[0];
 	        //printf("2thread0 llr flag %d tdp flag %d\n",llr8_flag1, tdp->llr8_flag);
 	        //printf("2thread1 nr_tti_tx %d subframe %d SF thread id %d r_offset %d\n", proc->nr_tti_rx, subframe, phy_vars_ue->current_thread_id[subframe], r_offset);
+	        p_nrLDPC_procBuf = harq_process->p_nrLDPC_procBuf[2];
 
   /*
   if (nb_rb > frame_parms->N_RB_DL) {
@@ -2044,7 +2087,7 @@ void *nr_dlsch_decoding_2thread1(void *arg)
       stop_meas(dlsch_rate_unmatching_stats);
 #endif
     }
-    r_offset += E;
+    //r_offset += E;
 
     /*
     printf("Subblock deinterleaving, d %p w %p\n",
diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_transport_ue.h b/openair1/PHY/NR_UE_TRANSPORT/nr_transport_ue.h
index c8ef70b2421..b41b23dd32a 100644
--- a/openair1/PHY/NR_UE_TRANSPORT/nr_transport_ue.h
+++ b/openair1/PHY/NR_UE_TRANSPORT/nr_transport_ue.h
@@ -267,6 +267,8 @@ typedef struct {
   double w_abs[MAX_NUM_NR_DLSCH_SEGMENTS][3*8448];
   /// soft bits for each received segment ("d"-sequence)(for definition see 36-212 V8.6 2009-03, p.15)
   int16_t *d[MAX_NUM_NR_DLSCH_SEGMENTS];
+  /// LDPC processing buffers
+  t_nrLDPC_procBuf* p_nrLDPC_procBuf[MAX_NUM_DLSCH_SEGMENTS];
   /// Number of code segments 
   uint32_t C;
   /// Number of bits in code segments
@@ -352,8 +354,6 @@ typedef struct {
   uint32_t Nsoft;
   /// Maximum number of LDPC iterations
   uint8_t max_ldpc_iterations;
-  /// LDPC processing buffers
-  t_nrLDPC_procBuf* p_nrLDPC_procBuf;
   /// number of iterations used in last turbo decoding
   uint8_t last_iteration_cnt;  
 } NR_UE_DLSCH_t;
-- 
GitLab