Commit d54fe24f authored by knopp's avatar knopp

acceleration of dlsch_modulation

parent 2176299a
...@@ -53,7 +53,7 @@ uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w) ...@@ -53,7 +53,7 @@ uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w)
{ {
uint32_t RTC = (D>>5), ND, ND3; uint32_t RTC = (D>>5), ND, ND3;
uint32_t row,col,Kpi,index; uint32_t row,col,Kpi;
uint32_t index3,k,k2; uint32_t index3,k,k2;
#ifdef RM_DEBUG #ifdef RM_DEBUG
uint32_t nulled=0; uint32_t nulled=0;
...@@ -84,7 +84,6 @@ uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w) ...@@ -84,7 +84,6 @@ uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w)
#ifdef RM_DEBUG #ifdef RM_DEBUG
printf("Col %d\n",col); printf("Col %d\n",col);
#endif #endif
index = bitrev[col];
index3 = bitrev_x3[col];//3*index; index3 = bitrev_x3[col];//3*index;
for (row=0; row<RTC; row++) { for (row=0; row<RTC; row++) {
...@@ -108,10 +107,7 @@ uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w) ...@@ -108,10 +107,7 @@ uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w)
#endif #endif
index3+=96; index3+=96;
index+=32; k++;k2+=2;
k++;
k2++;
k2++;
} }
} }
......
...@@ -861,7 +861,7 @@ void dlsch_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms, ...@@ -861,7 +861,7 @@ void dlsch_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
len_mod4 =len&3; len_mod4 =len&3;
len2=len>>2; // length in quad words (4 REs) len2=len>>2; // length in quad words (4 REs)
len2+=(len_mod4?0:1); len2+=((len_mod4==0)?0:1);
for (i=0; i<len2; i++) { for (i=0; i<len2; i++) {
......
...@@ -56,10 +56,11 @@ void dlsch_scrambling(LTE_DL_FRAME_PARMS *frame_parms, ...@@ -56,10 +56,11 @@ void dlsch_scrambling(LTE_DL_FRAME_PARMS *frame_parms,
uint8_t Ns) uint8_t Ns)
{ {
int i,j,k=0; int i;
// uint8_t reset; // uint8_t reset;
uint32_t x1, x2, s=0; uint32_t x1, x2, s=0;
uint8_t *e=dlsch->harq_processes[dlsch->current_harq_pid]->e; uint8_t *dlsch_e=dlsch->harq_processes[dlsch->current_harq_pid]->e;
uint8_t *e=dlsch_e;
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_ENB_DLSCH_SCRAMBLING, VCD_FUNCTION_IN); VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_ENB_DLSCH_SCRAMBLING, VCD_FUNCTION_IN);
...@@ -78,24 +79,56 @@ void dlsch_scrambling(LTE_DL_FRAME_PARMS *frame_parms, ...@@ -78,24 +79,56 @@ void dlsch_scrambling(LTE_DL_FRAME_PARMS *frame_parms,
for (i=0; i<(1+(G>>5)); i++) { for (i=0; i<(1+(G>>5)); i++) {
for (j=0; j<32; j++,k++) {
#ifdef DEBUG_SCRAMBLING #ifdef DEBUG_SCRAMBLING
printf("scrambling %d : %d => ",k,e[k]); printf("scrambling %d : %d => ",k,e[k]);
#endif #endif
e[k] = (e[k]&1) ^ ((s>>j)&1);
e[0] = (e[0]&1) ^ (s&1);
e[1] = (e[1]&1) ^ ((s>>1)&1);
e[2] = (e[2]&1) ^ ((s>>2)&1);
e[3] = (e[3]&1) ^ ((s>>3)&1);
e[4] = (e[4]&1) ^ ((s>>4)&1);
e[5] = (e[5]&1) ^ ((s>>5)&1);
e[6] = (e[6]&1) ^ ((s>>6)&1);
e[7] = (e[7]&1) ^ ((s>>7)&1);
e[8] = (e[8]&1) ^ ((s>>8)&1);
e[9] = (e[9]&1) ^ ((s>>9)&1);
e[10] = (e[10]&1) ^ ((s>>10)&1);
e[11] = (e[11]&1) ^ ((s>>11)&1);
e[12] = (e[12]&1) ^ ((s>>12)&1);
e[13] = (e[13]&1) ^ ((s>>13)&1);
e[14] = (e[14]&1) ^ ((s>>14)&1);
e[15] = (e[15]&1) ^ ((s>>15)&1);
e[16] = (e[16]&1) ^ ((s>>16)&1);
e[17] = (e[17]&1) ^ ((s>>17)&1);
e[18] = (e[18]&1) ^ ((s>>18)&1);
e[19] = (e[19]&1) ^ ((s>>19)&1);
e[20] = (e[20]&1) ^ ((s>>20)&1);
e[21] = (e[21]&1) ^ ((s>>21)&1);
e[22] = (e[22]&1) ^ ((s>>22)&1);
e[23] = (e[23]&1) ^ ((s>>23)&1);
e[24] = (e[24]&1) ^ ((s>>24)&1);
e[25] = (e[25]&1) ^ ((s>>25)&1);
e[26] = (e[26]&1) ^ ((s>>26)&1);
e[27] = (e[27]&1) ^ ((s>>27)&1);
e[28] = (e[28]&1) ^ ((s>>28)&1);
e[29] = (e[29]&1) ^ ((s>>29)&1);
e[30] = (e[30]&1) ^ ((s>>30)&1);
e[31] = (e[31]&1) ^ ((s>>31)&1);
#ifdef DEBUG_SCRAMBLING #ifdef DEBUG_SCRAMBLING
printf("%d\n",e[k]); printf("%d\n",e[k]);
#endif #endif
}
s = lte_gold_generic(&x1, &x2, 0); s = lte_gold_generic(&x1, &x2, 0);
e += 32;
} }
VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_ENB_DLSCH_SCRAMBLING, VCD_FUNCTION_OUT); VCD_SIGNAL_DUMPER_DUMP_FUNCTION_BY_NAME(VCD_SIGNAL_DUMPER_FUNCTIONS_ENB_DLSCH_SCRAMBLING, VCD_FUNCTION_OUT);
} }
void dlsch_unscrambling(LTE_DL_FRAME_PARMS *frame_parms, void dlsch_unscrambling(LTE_DL_FRAME_PARMS *frame_parms,
int mbsfn_flag, int mbsfn_flag,
LTE_UE_DLSCH_t *dlsch, LTE_UE_DLSCH_t *dlsch,
......
...@@ -183,7 +183,9 @@ int32_t allocate_REs_in_RB(LTE_DL_FRAME_PARMS *frame_parms, ...@@ -183,7 +183,9 @@ int32_t allocate_REs_in_RB(LTE_DL_FRAME_PARMS *frame_parms,
int16_t *qam_table_s1, int16_t *qam_table_s1,
uint32_t *re_allocated, uint32_t *re_allocated,
uint8_t skip_dc, uint8_t skip_dc,
uint8_t skip_half); uint8_t skip_half,
int *P1_SHIFT,
int *P2_SHIFT);
/** \fn int32_t dlsch_modulation(mod_sym_t **txdataF, /** \fn int32_t dlsch_modulation(mod_sym_t **txdataF,
......
...@@ -85,7 +85,7 @@ int mult_cpx_conj_vector(int16_t *x1, ...@@ -85,7 +85,7 @@ int mult_cpx_conj_vector(int16_t *x1,
// we compute 4 cpx multiply for each loop // we compute 4 cpx multiply for each loop
for(i=0; i<(N>>2); i++) { for(i=0; i<(N>>2); i++) {
#if defined(__x86_64__) || defined(__i386__) #if defined(__x86_64__) || defined(__i386__)
tmp_re = _mm_madd_epi16(*x1_128,*x2_128); tmp_re = _mm_madd_epi16(*x1_128,*x2_128);
tmp_im = _mm_shufflelo_epi16(*x1_128,_MM_SHUFFLE(2,3,0,1)); tmp_im = _mm_shufflelo_epi16(*x1_128,_MM_SHUFFLE(2,3,0,1));
tmp_im = _mm_shufflehi_epi16(tmp_im,_MM_SHUFFLE(2,3,0,1)); tmp_im = _mm_shufflehi_epi16(tmp_im,_MM_SHUFFLE(2,3,0,1));
......
...@@ -105,7 +105,6 @@ static inline void cmacc(__m128i a,__m128i b, __m128i *re32, __m128i *im32) ...@@ -105,7 +105,6 @@ static inline void cmacc(__m128i a,__m128i b, __m128i *re32, __m128i *im32)
static inline void cmult(__m128i a,__m128i b, __m128i *re32, __m128i *im32) __attribute__((always_inline)); static inline void cmult(__m128i a,__m128i b, __m128i *re32, __m128i *im32) __attribute__((always_inline));
static inline void cmult(__m128i a,__m128i b, __m128i *re32, __m128i *im32) static inline void cmult(__m128i a,__m128i b, __m128i *re32, __m128i *im32)
...@@ -1754,7 +1753,8 @@ int16_t tw64c[96] __attribute__((aligned(16))) = { 0,32767,3212,32609,6393,32137 ...@@ -1754,7 +1753,8 @@ int16_t tw64c[96] __attribute__((aligned(16))) = { 0,32767,3212,32609,6393,32137
#define simdshort_q15_t __m64 #define simdshort_q15_t __m64
#define shiftright_int16(a,shift) _mm_srai_epi16(a,shift) #define shiftright_int16(a,shift) _mm_srai_epi16(a,shift)
#define set1_int16(a) _mm_set1_epi16(a); #define set1_int16(a) _mm_set1_epi16(a);
#define mulhi_int16(a,b) _mm_slli_epi16(_mm_mulhi_epi16(a,b),1); //#define mulhi_int16(a,b) _mm_slli_epi16(_mm_mulhi_epi16(a,b),1);
#define mulhi_int16(a,b) _mm_mulhrs_epi16 (a,b)
#elif defined(__arm__) #elif defined(__arm__)
#define simd_q15_t int16x8_t #define simd_q15_t int16x8_t
#define simdshort_q15_t int16x4_t #define simdshort_q15_t int16x4_t
...@@ -215,7 +215,7 @@ int main(int argc, char **argv) ...@@ -215,7 +215,7 @@ int main(int argc, char **argv)
// void *data; // void *data;
// int ii; // int ii;
// int bler; // int bler;
double blerr[4],uncoded_ber;//,avg_ber; double blerr[4],uncoded_ber,avg_ber;
short *uncoded_ber_bit=NULL; short *uncoded_ber_bit=NULL;
uint8_t N_RB_DL=25,osf=1; uint8_t N_RB_DL=25,osf=1;
frame_t frame_type = FDD; frame_t frame_type = FDD;
...@@ -2623,7 +2623,7 @@ PMI_FEEDBACK: ...@@ -2623,7 +2623,7 @@ PMI_FEEDBACK:
&PHY_vars_eNB->lte_frame_parms, &PHY_vars_eNB->lte_frame_parms,
num_pdcch_symbols, num_pdcch_symbols,
PHY_vars_eNB->dlsch_eNB[k][0], PHY_vars_eNB->dlsch_eNB[k][0],
PHY_vars_eNB->dlsch_eNB[k][1]); (transmission_mode==3)||(transmission_mode==4) ? PHY_vars_eNB->dlsch_eNB[k][1] : NULL);
/* avoid gcc warnings */ /* avoid gcc warnings */
(void)re_allocated; (void)re_allocated;
...@@ -3292,7 +3292,7 @@ PMI_FEEDBACK: ...@@ -3292,7 +3292,7 @@ PMI_FEEDBACK:
PHY_vars_UE->dlsch_ue[0][cw]->harq_processes[PHY_vars_UE->dlsch_ue[0][cw]->current_harq_pid]->G = coded_bits_per_codeword; PHY_vars_UE->dlsch_ue[0][cw]->harq_processes[PHY_vars_UE->dlsch_ue[0][cw]->current_harq_pid]->G = coded_bits_per_codeword;
/*
// calculate uncoded BLER // calculate uncoded BLER
uncoded_ber=0; uncoded_ber=0;
for (i=0;i<coded_bits_per_codeword;i++) for (i=0;i<coded_bits_per_codeword;i++)
...@@ -3308,7 +3308,7 @@ PMI_FEEDBACK: ...@@ -3308,7 +3308,7 @@ PMI_FEEDBACK:
if (n_frames==1) if (n_frames==1)
write_output("uncoded_ber_bit.m","uncoded_ber_bit",uncoded_ber_bit,coded_bits_per_codeword,1,0); write_output("uncoded_ber_bit.m","uncoded_ber_bit",uncoded_ber_bit,coded_bits_per_codeword,1,0);
*/
start_meas(&PHY_vars_UE->dlsch_unscrambling_stats); start_meas(&PHY_vars_UE->dlsch_unscrambling_stats);
dlsch_unscrambling(&PHY_vars_UE->lte_frame_parms, dlsch_unscrambling(&PHY_vars_UE->lte_frame_parms,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment