From 1cb484f1b62bd61f6fdf77f25f2e56f192f0552f Mon Sep 17 00:00:00 2001 From: frtabu <francois.taburet@nokia-bell-labs.com> Date: Tue, 22 Jan 2019 22:07:40 +0100 Subject: [PATCH] fix more trivial cppcheck errors and warnings --- common/config/config_cmdline.c | 3 +- nfapi/oai_integration/nfapi_vnf.c | 6 +- openair1/PHY/CODING/3gpplte_sse.c | 298 +++---- .../PHY/CODING/3gpplte_turbo_decoder_sse.c | 531 ++++-------- .../CODING/3gpplte_turbo_decoder_sse_16bit.c | 765 +++++++---------- .../CODING/3gpplte_turbo_decoder_sse_8bit.c | 583 ++++++------- openair1/PHY/CODING/ccoding_byte.c | 127 ++- openair1/PHY/CODING/ccoding_byte_lte.c | 47 +- openair1/PHY/CODING/lte_rate_matching.c | 111 +-- openair1/PHY/CODING/lte_segmentation.c | 40 +- .../lte_dl_channel_estimation.c | 547 +++++------- openair1/PHY/LTE_TRANSPORT/prach.c | 782 +++++++++--------- 12 files changed, 1548 insertions(+), 2292 deletions(-) diff --git a/common/config/config_cmdline.c b/common/config/config_cmdline.c index 95e9805cab1..aebf7824a45 100644 --- a/common/config/config_cmdline.c +++ b/common/config/config_cmdline.c @@ -164,9 +164,10 @@ int processoption(paramdef_t *cfgoptions, char *value) { */ int config_check_unknown_cmdlineopt(char *prefix) { int unknowndetected=0; - char testprefix[CONFIG_MAXOPTLENGTH]=""; + char testprefix[CONFIG_MAXOPTLENGTH]; int finalcheck = 0; + memset(testpref,0,sizeof(testprefix)); if (prefix != NULL) { if (strcmp(prefix,CONFIG_CHECKALLSECTIONS) == 0) finalcheck = 1; diff --git a/nfapi/oai_integration/nfapi_vnf.c b/nfapi/oai_integration/nfapi_vnf.c index 25463a32890..7dea74b9b85 100644 --- a/nfapi/oai_integration/nfapi_vnf.c +++ b/nfapi/oai_integration/nfapi_vnf.c @@ -270,6 +270,7 @@ int pnf_param_resp_cb(nfapi_vnf_config_t* config, int p5_idx, nfapi_pnf_param_re for(int i = 0; i < resp->pnf_phy.number_of_phys; ++i) { phy_info phy; + memset(phy,0,sizeof(phy)); phy.index = resp->pnf_phy.phy[i].phy_config_index; printf("[VNF] (PHY:%d) phy_config_idx:%d\n", i, resp->pnf_phy.phy[i].phy_config_index); @@ -287,6 +288,7 @@ int pnf_param_resp_cb(nfapi_vnf_config_t* config, int p5_idx, nfapi_pnf_param_re for(int i = 0; i < resp->pnf_rf.number_of_rfs; ++i) { rf_info rf; + memset(rf,0,sizeof(rf)); rf.index = resp->pnf_rf.rf[i].rf_config_index; printf("[VNF] (RF:%d) rf_config_idx:%d\n", i, resp->pnf_rf.rf[i].rf_config_index); @@ -897,7 +899,7 @@ int param_resp_cb(nfapi_vnf_config_t* config, int p5_idx, nfapi_param_response_t // for now just 1 - printf("[VNF] %d.%d pnf p7 %s:%d timing %d %d %d %d\n", p5_idx, phy->id, phy->remote_addr, phy->remote_port, p7_vnf->timing_window, p7_vnf->periodic_timing_period, p7_vnf->aperiodic_timing_enabled, p7_vnf->periodic_timing_period); + printf("[VNF] %d.%d pnf p7 %s:%d timing %u %u %u %u\n", p5_idx, phy->id, phy->remote_addr, phy->remote_port, p7_vnf->timing_window, p7_vnf->periodic_timing_period, p7_vnf->aperiodic_timing_enabled, p7_vnf->periodic_timing_period); req->header.message_id = NFAPI_CONFIG_REQUEST; req->header.phy_id = phy->id; @@ -919,7 +921,7 @@ int param_resp_cb(nfapi_vnf_config_t* config, int p5_idx, nfapi_param_response_t req->nfapi_config.timing_window.tl.tag = NFAPI_NFAPI_TIMING_WINDOW_TAG; req->nfapi_config.timing_window.value = p7_vnf->timing_window; - printf("[VNF] Timing window:%d\n", p7_vnf->timing_window); + printf("[VNF] Timing window:%u\n", p7_vnf->timing_window); req->num_tlv++; if(p7_vnf->periodic_timing_enabled || p7_vnf->aperiodic_timing_enabled) { diff --git a/openair1/PHY/CODING/3gpplte_sse.c b/openair1/PHY/CODING/3gpplte_sse.c index 96dbcc37171..2a7b7896504 100644 --- a/openair1/PHY/CODING/3gpplte_sse.c +++ b/openair1/PHY/CODING/3gpplte_sse.c @@ -26,9 +26,9 @@ date: 09.2012 */ #ifndef TC_MAIN -#include "coding_defs.h" + #include "coding_defs.h" #else -#include <stdint.h> + #include <stdint.h> #endif #include <stdio.h> #include <string.h> @@ -66,11 +66,11 @@ struct treillis { union { uint8x8_t systematic_andp1_64[3]; char systematic_andp1_8[24]; - }__attribute__((aligned(64))); + } __attribute__((aligned(64))); union { uint8x8_t parity2_64[3]; char parity2_8[24]; - }__attribute__((aligned(64))); + } __attribute__((aligned(64))); int exit_state; }; #endif @@ -79,23 +79,20 @@ struct treillis all_treillis[8][256]; int all_treillis_initialized=0; -static inline unsigned char threegpplte_rsc(unsigned char input,unsigned char *state) -{ +static inline unsigned char threegpplte_rsc(unsigned char input,unsigned char *state) { unsigned char output; output = (input ^ (*state>>2) ^ (*state>>1))&1; *state = (((input<<2)^(*state>>1))^((*state>>1)<<2)^((*state)<<2))&7; return(output); } -static inline void threegpplte_rsc_termination(unsigned char *x,unsigned char *z,unsigned char *state) -{ +static inline void threegpplte_rsc_termination(unsigned char *x,unsigned char *z,unsigned char *state) { *z = ((*state>>2) ^ (*state)) &1; *x = ((*state) ^ (*state>>1)) &1; *state = (*state)>>1; } -static void treillis_table_init(void) -{ +static void treillis_table_init(void) { //struct treillis t[][]=all_treillis; //t=memalign(16,sizeof(struct treillis)*8*256); int i, j,b; @@ -114,8 +111,8 @@ static void treillis_table_init(void) all_treillis[i][j].systematic_andp1_8[b*3]= (j&(1<<(7-b)))>>(7-b); v=threegpplte_rsc( all_treillis[i][j].systematic_andp1_8[b*3] , ¤t_state); - all_treillis[i][j].systematic_andp1_8[b*3+1]=v; // for the yparity1 - // all_treillis[i][j].parity1_8[b*3+1]=v; // for the yparity1 + all_treillis[i][j].systematic_andp1_8[b*3+1]=v; // for the yparity1 + // all_treillis[i][j].parity1_8[b*3+1]=v; // for the yparity1 all_treillis[i][j].parity2_8[b*3+2]=v; // for the yparity2 } @@ -128,14 +125,12 @@ static void treillis_table_init(void) } -char interleave_compact_byte(short * base_interleaver,unsigned char * input, unsigned char * output, int n) -{ - +char interleave_compact_byte(short *base_interleaver,unsigned char *input, unsigned char *output, int n) { char expandInput[768*8] __attribute__((aligned(32))); int i,loop=n>>4; #if defined(__x86_64__) || defined(__i386__) #ifndef __AVX2__ - __m128i *i_128=(__m128i *)input, *o_128=(__m128i*)expandInput; + __m128i *i_128=(__m128i *)input, *o_128=(__m128i *)expandInput; __m128i tmp1, tmp2, tmp3, tmp4; __m128i BIT_MASK = _mm_set_epi8( 0b00000001, 0b00000010, @@ -153,42 +148,41 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns 0b00100000, 0b01000000, 0b10000000); - #else - __m256i *i_256=(__m256i *)input, *o_256=(__m256i*)expandInput; + __m256i *i_256=(__m256i *)input, *o_256=(__m256i *)expandInput; __m256i tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; __m256i BIT_MASK = _mm256_set_epi8( 0b00000001, - 0b00000010, - 0b00000100, - 0b00001000, - 0b00010000, - 0b00100000, - 0b01000000, - 0b10000000, - 0b00000001, - 0b00000010, - 0b00000100, - 0b00001000, - 0b00010000, - 0b00100000, - 0b01000000, - 0b10000000, - 0b00000001, - 0b00000010, - 0b00000100, - 0b00001000, - 0b00010000, - 0b00100000, - 0b01000000, - 0b10000000, - 0b00000001, - 0b00000010, - 0b00000100, - 0b00001000, - 0b00010000, - 0b00100000, - 0b01000000, - 0b10000000); + 0b00000010, + 0b00000100, + 0b00001000, + 0b00010000, + 0b00100000, + 0b01000000, + 0b10000000, + 0b00000001, + 0b00000010, + 0b00000100, + 0b00001000, + 0b00010000, + 0b00100000, + 0b01000000, + 0b10000000, + 0b00000001, + 0b00000010, + 0b00000100, + 0b00001000, + 0b00010000, + 0b00100000, + 0b01000000, + 0b10000000, + 0b00000001, + 0b00000010, + 0b00000100, + 0b00001000, + 0b00010000, + 0b00100000, + 0b01000000, + 0b10000000); #endif #elif defined(__arm__) uint8x16_t *i_128=(uint8x16_t *)input, *o_128=(uint8x16_t *)expandInput; @@ -196,40 +190,41 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns uint16x8_t tmp3; uint32x4_t tmp4; uint8x16_t and_tmp; - uint8x16_t BIT_MASK = { 0b10000000, - 0b01000000, - 0b00100000, - 0b00010000, - 0b00001000, - 0b00000100, - 0b00000010, - 0b00000001, - 0b10000000, - 0b01000000, - 0b00100000, - 0b00010000, - 0b00001000, - 0b00000100, - 0b00000010, - 0b00000001}; + uint8x16_t BIT_MASK = { 0b10000000, + 0b01000000, + 0b00100000, + 0b00010000, + 0b00001000, + 0b00000100, + 0b00000010, + 0b00000001, + 0b10000000, + 0b01000000, + 0b00100000, + 0b00010000, + 0b00001000, + 0b00000100, + 0b00000010, + 0b00000001 + }; #endif - - #ifndef __AVX2__ + if ((n&15) > 0) loop++; + #else loop=n>>5; + if ((n&31) > 0) loop++; -#endif +#endif for (i=0; i<loop ; i++ ) { - // int cur_byte=i<<3; - // for (b=0;b<8;b++) - // expandInput[cur_byte+b] = (input[i]&(1<<(7-b)))>>(7-b); - + // int cur_byte=i<<3; + // for (b=0;b<8;b++) + // expandInput[cur_byte+b] = (input[i]&(1<<(7-b)))>>(7-b); #if defined(__x86_64__) || defined(__i386__) #ifndef __AVX2__ tmp1=_mm_load_si128(i_128++); // tmp1 = B0,B1,...,B15 @@ -237,29 +232,22 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns tmp3=_mm_unpacklo_epi16(tmp2,tmp2); // tmp3 = B0,B0,B0,B0,B1,B1,B1,B1,B2,B2,B2,B2,B3,B3,B3,B3 tmp4=_mm_unpacklo_epi32(tmp3,tmp3); // tmp4 - B0,B0,B0,B0,B0,B0,B0,B0,B1,B1,B1,B1,B1,B1,B1,B1 *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK); - tmp4=_mm_unpackhi_epi32(tmp3,tmp3); // tmp4 - B2,B2,B2,B2,B2,B2,B2,B2,B3,B3,B3,B3,B3,B3,B3,B3 *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);; - tmp3=_mm_unpackhi_epi16(tmp2,tmp2); // tmp3 = B4,B4,B4,B4,B5,B5,B5,B5,B6,B6,B6,B6,B7,B7,B7,B7 tmp4=_mm_unpacklo_epi32(tmp3,tmp3); // tmp4 - B4,B4,B4,B4,B4,B4,B4,B4,B5,B5,B5,B5,B5,B5,B5,B5 *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);; - tmp4=_mm_unpackhi_epi32(tmp3,tmp3); // tmp4 - B6,B6,B6,B6,B6,B6,B6,B6,B7,B7,B7,B7,B7,B7,B7,B7 *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);; - tmp2=_mm_unpackhi_epi8(tmp1,tmp1); // tmp2 = B8,B8,B9,B9,...,B15,B15 tmp3=_mm_unpacklo_epi16(tmp2,tmp2); // tmp3 = B8,B8,B8,B8,B9,B9,B9,B9,B10,B10,B10,B10,B11,B11,B11,B11 tmp4=_mm_unpacklo_epi32(tmp3,tmp3); // tmp4 = B8,B8,B8,B8,B8,B8,B8,B8,B9,B9,B9,B9,B9,B9,B9,B9 *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);; - tmp4=_mm_unpackhi_epi32(tmp3,tmp3); // tmp4 = B10,B10,B10,B10,B10,B10,B10,B10,B11,B11,B11,B11,B11,B11,B11,B11 *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);; - tmp3=_mm_unpackhi_epi16(tmp2,tmp2); // tmp3 = B12,B12,B12,B12,B13,B13,B13,B13,B14,B14,B14,B14,B15,B15,B15,B15 tmp4=_mm_unpacklo_epi32(tmp3,tmp3); // tmp4 = B12,B12,B12,B12,B12,B12,B12,B12,B13,B13,B13,B13,B13,B13,B13,B13 *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);; - tmp4=_mm_unpackhi_epi32(tmp3,tmp3); // tmp4 = B14,B14,B14,B14,B14,B14,B14,B14,B15,B15,B15,B15,B15,B15,B15,B15 *o_128++=_mm_cmpeq_epi8(_mm_and_si128(tmp4,BIT_MASK),BIT_MASK);; #else @@ -281,7 +269,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns //print_bytes2("out",(uint8_t*)o_256); o_256[4]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp7,BIT_MASK),BIT_MASK);; //print_bytes2("out",(uint8_t*)(o_256+4)); - tmp3=_mm256_unpackhi_epi16(tmp2,tmp2); // tmp3 = B4,B4,B4,B4,B5,B5,B5,B5,B6,B6,B6,B6,B7,B7,B7,B7,B20,B20,B20,B20,...,B23,B23,B23,B23 tmp4=_mm256_unpacklo_epi32(tmp3,tmp3); // tmp4 - B4,B4,B4,B4,B4,B4,B4,B4,B5,B5,B5,B5,B5,B5,B5,B5,B20,B20...,B21..,B21 tmp5=_mm256_unpackhi_epi32(tmp3,tmp3); // tmp5 - B6,B6,B6,B6,B6,B6,B6,B6,B7,B7,B7,B7,B7,B7,B7,B7,B22...,B22,B23,...,B23 @@ -297,7 +284,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns //print_bytes2("out",(uint8_t*)(o_256+1)); o_256[5]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp7,BIT_MASK),BIT_MASK);; //print_bytes2("out",(uint8_t*)(o_256+4)); - tmp2=_mm256_unpackhi_epi8(tmp1,tmp1); // tmp2 = B8 B9 B10 B11 B12 B13 B14 B15 B25 B26 B27 B28 B29 B30 B31 tmp3=_mm256_unpacklo_epi16(tmp2,tmp2); // tmp3 = B8,B9,B10,B11,B26,B27,B28,B29 tmp4=_mm256_unpacklo_epi32(tmp3,tmp3); // tmp4 - B8,B9,B26,B27 @@ -314,11 +300,10 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns //print_bytes2("out",(uint8_t*)(o_256+2)); o_256[6]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp7,BIT_MASK),BIT_MASK);; //print_bytes2("out",(uint8_t*)(o_256+4)); - tmp3=_mm256_unpackhi_epi16(tmp2,tmp2); // tmp3 = B12 B13 B14 B15 B28 B29 B30 B31 tmp4=_mm256_unpacklo_epi32(tmp3,tmp3); // tmp4 = B12 B13 B28 B29 - tmp5=_mm256_unpackhi_epi32(tmp3,tmp3); // tmp5 = B14 B15 B30 B31 - tmp6=_mm256_insertf128_si256(tmp4,_mm256_extracti128_si256(tmp5,0),1); // tmp6 = B12 B13 B14 B15 + tmp5=_mm256_unpackhi_epi32(tmp3,tmp3); // tmp5 = B14 B15 B30 B31 + tmp6=_mm256_insertf128_si256(tmp4,_mm256_extracti128_si256(tmp5,0),1); // tmp6 = B12 B13 B14 B15 tmp7=_mm256_insertf128_si256(tmp5,_mm256_extracti128_si256(tmp4,1),0); // tmp7 = B28 B29 B30 B31 //print_bytes2("tmp2",(uint8_t*)&tmp2); //print_bytes2("tmp3",(uint8_t*)&tmp3); @@ -330,48 +315,35 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns //print_bytes2("out",(uint8_t*)(o_256+3)); o_256[7]=_mm256_cmpeq_epi8(_mm256_and_si256(tmp7,BIT_MASK),BIT_MASK);; //print_bytes2("out",(uint8_t*)(o_256+7)); - o_256+=8; #endif #elif defined(__arm__) - tmp1=vld1q_u8((uint8_t*)i_128); + tmp1=vld1q_u8((uint8_t *)i_128); //print_bytes("tmp1:",(uint8_t*)&tmp1); - uint8x16x2_t temp1 = vzipq_u8(tmp1,tmp1); tmp2 = temp1.val[0]; - uint16x8x2_t temp2 = vzipq_u16((uint16x8_t)tmp2,(uint16x8_t)tmp2); tmp3 = temp2.val[0]; - uint32x4x2_t temp3 = vzipq_u32((uint32x4_t)tmp3,(uint32x4_t)tmp3); tmp4 = temp3.val[0]; //print_bytes("tmp4:",(uint8_t*)&tmp4); - *o_128++=vceqq_u8(vandq_u8((uint8x16_t)tmp4,BIT_MASK),BIT_MASK); //1 //print_bytes("o:",(uint8_t*)(o_128-1)); - tmp4 = temp3.val[1]; //print_bytes("tmp4:",(uint8_t*)&tmp4); - *o_128++=vceqq_u8(vandq_u8((uint8x16_t)tmp4,BIT_MASK),BIT_MASK); //2 //print_bytes("o:",(uint8_t*)(o_128-1)); - tmp3 = temp2.val[1]; temp3 = vzipq_u32((uint32x4_t)tmp3,(uint32x4_t)tmp3); tmp4 = temp3.val[0]; //print_bytes("tmp4:",(uint8_t*)&tmp4); - *o_128++=vceqq_u8(vandq_u8((uint8x16_t)tmp4,BIT_MASK),BIT_MASK); //3 //print_bytes("o:",(uint8_t*)(o_128-1)); - tmp4 = temp3.val[1]; //print_bytes("tmp4:",(uint8_t*)&tmp4); - *o_128++=vceqq_u8(vandq_u8((uint8x16_t)tmp4,BIT_MASK),BIT_MASK); //4 - //and_tmp = vandq_u8((uint8x16_t)tmp4,BIT_MASK); print_bytes("and:",and_tmp); + //and_tmp = vandq_u8((uint8x16_t)tmp4,BIT_MASK); print_bytes("and:",and_tmp); //print_bytes("o:",(uint8_t*)(o_128-1)); - - temp1 = vzipq_u8(tmp1,tmp1); tmp2 = temp1.val[1]; temp2 = vzipq_u16((uint16x8_t)tmp2,(uint16x8_t)tmp2); @@ -379,52 +351,41 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns temp3 = vzipq_u32((uint32x4_t)tmp3,(uint32x4_t)tmp3); tmp4 = temp3.val[0]; //print_bytes("tmp4:",(uint8_t*)&tmp4); - *o_128++=vceqq_u8(vandq_u8((uint8x16_t)tmp4,BIT_MASK),BIT_MASK); //5 //print_bytes("o:",(uint8_t*)(o_128-1)); - tmp4 = temp3.val[1]; //print_bytes("tmp4:",(uint8_t*)&tmp4); - *o_128++=vceqq_u8(vandq_u8((uint8x16_t)tmp4,BIT_MASK),BIT_MASK); //6 //print_bytes("o:",(uint8_t*)(o_128-1)); - - temp2 = vzipq_u16((uint16x8_t)tmp2,(uint16x8_t)tmp2); tmp3 = temp2.val[1]; temp3 = vzipq_u32((uint32x4_t)tmp3,(uint32x4_t)tmp3); tmp4 = temp3.val[0]; //print_bytes("tmp4:",(uint8_t*)&tmp4); - *o_128++=vceqq_u8(vandq_u8((uint8x16_t)tmp4,BIT_MASK),BIT_MASK); //7 //print_bytes("o:",(uint8_t*)(o_128-1)); - tmp4 = temp3.val[1]; //print_bytes("tmp4:",(uint8_t*)&tmp4); - *o_128++=vceqq_u8(vandq_u8((uint8x16_t)tmp4,BIT_MASK),BIT_MASK); //7 //print_bytes("o:",(uint8_t*)(o_128-1)); - i_128++; #endif } - - short * ptr_intl=base_interleaver; + short *ptr_intl=base_interleaver; #if defined(__x86_64) || defined(__i386__) #ifndef __AVX2__ __m128i tmp; - uint16_t *systematic2_ptr=(uint16_t *) output; + uint16_t *systematic2_ptr=(uint16_t *) output; #else __m256i tmp; - uint32_t *systematic2_ptr=(uint32_t *) output; + uint32_t *systematic2_ptr=(uint32_t *) output; #endif #elif defined(__arm__) uint8x16_t tmp; - const uint8_t __attribute__ ((aligned (16))) _Powers[16]= - { 1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128 }; - -// Set the powers of 2 (do it once for all, if applicable) + const uint8_t __attribute__ ((aligned (16))) _Powers[16]= + { 1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128 }; + // Set the powers of 2 (do it once for all, if applicable) uint8x16_t Powers= vld1q_u8(_Powers); uint8_t *systematic2_ptr=(uint8_t *) output; #endif @@ -435,8 +396,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns #endif for ( i=0; i< input_length_words ; i ++ ) { - - #if defined(__x86_64__) || defined(__i386__) #ifndef __AVX2__ tmp=_mm_insert_epi8(tmp,expandInput[*ptr_intl++],7); @@ -465,7 +424,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],2); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],1); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],0); - tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+7); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+6); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+5); @@ -474,7 +432,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+2); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+1); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],8+0); - tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+7); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+6); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+5); @@ -483,7 +440,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+2); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+1); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],16+0); - tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+7); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+6); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+5); @@ -492,7 +448,6 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+2); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+1); tmp=_mm256_insert_epi8(tmp,expandInput[*ptr_intl++],24+0); - *systematic2_ptr++=(unsigned int)_mm256_movemask_epi8(tmp); #endif #elif defined(__arm__) @@ -512,11 +467,10 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns tmp=vsetq_lane_u8(expandInput[*ptr_intl++],tmp,8+2); tmp=vsetq_lane_u8(expandInput[*ptr_intl++],tmp,8+1); tmp=vsetq_lane_u8(expandInput[*ptr_intl++],tmp,8+0); -// Compute the mask from the input + // Compute the mask from the input uint64x2_t Mask= vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vandq_u8(tmp, Powers)))); vst1q_lane_u8(systematic2_ptr++, (uint8x16_t)Mask, 0); vst1q_lane_u8(systematic2_ptr++, (uint8x16_t)Mask, 8); - #endif } @@ -537,14 +491,12 @@ char interleave_compact_byte(short * base_interleaver,unsigned char * input, uns void threegpplte_turbo_encoder_sse(unsigned char *input, unsigned short input_length_bytes, unsigned char *output, - unsigned char F) -{ - + unsigned char F) { int i; unsigned char *x; unsigned char state0=0,state1=0; unsigned short input_length_bits = input_length_bytes<<3; - short * base_interleaver; + short *base_interleaver; if ( all_treillis_initialized == 0 ) { treillis_table_init(); @@ -560,15 +512,12 @@ void threegpplte_turbo_encoder_sse(unsigned char *input, base_interleaver=il_tb+f1f2mat[i].beg_index; } - unsigned char systematic2[768] __attribute__((aligned(32))); - interleave_compact_byte(base_interleaver,input,systematic2,input_length_bytes); - #if defined(__x86_64__) || defined(__i386__) - __m64 *ptr_output=(__m64*) output; + __m64 *ptr_output=(__m64 *) output; #elif defined(__arm__) - uint8x8_t *ptr_output=(uint8x8_t*)output; + uint8x8_t *ptr_output=(uint8x8_t *)output; #endif unsigned char cur_s1, cur_s2; int code_rate; @@ -582,54 +531,45 @@ void threegpplte_turbo_encoder_sse(unsigned char *input, /* *ptr_output++ = _mm_add_pi8(all_treillis[state0][cur_s1].systematic_64[code_rate], _mm_add_pi8(all_treillis[state0][cur_s1].parity1_64[code_rate], - all_treillis[state1][cur_s2].parity2_64[code_rate])); - */ - + all_treillis[state1][cur_s2].parity2_64[code_rate])); + */ *ptr_output++ = _mm_add_pi8(all_treillis[state0][cur_s1].systematic_andp1_64[code_rate], - all_treillis[state1][cur_s2].parity2_64[code_rate]); - - + all_treillis[state1][cur_s2].parity2_64[code_rate]); #elif defined(__arm__) - *ptr_output++ = vadd_u8(all_treillis[state0][cur_s1].systematic_andp1_64[code_rate], - all_treillis[state0][cur_s1].parity2_64[code_rate]); + *ptr_output++ = vadd_u8(all_treillis[state0][cur_s1].systematic_andp1_64[code_rate], + all_treillis[state0][cur_s1].parity2_64[code_rate]); #endif - } - - state0=all_treillis[state0][cur_s1].exit_state; - state1=all_treillis[state1][cur_s2].exit_state; + } + + state0=all_treillis[state0][cur_s1].exit_state; + state1=all_treillis[state1][cur_s2].exit_state; } x=output+(input_length_bits*3); - // Trellis termination threegpplte_rsc_termination(&x[0],&x[1],&state0); #ifdef DEBUG_TURBO_ENCODER - printf("term: x0 %d, x1 %d, state0 %d\n",x[0],x[1],state0); + printf("term: x0 %u, x1 %u, state0 %d\n",x[0],x[1],state0); #endif //DEBUG_TURBO_ENCODER - threegpplte_rsc_termination(&x[2],&x[3],&state0); #ifdef DEBUG_TURBO_ENCODER - printf("term: x0 %d, x1 %d, state0 %d\n",x[2],x[3],state0); + printf("term: x0 %u, x1 %u, state0 %d\n",x[2],x[3],state0); #endif //DEBUG_TURBO_ENCODER - threegpplte_rsc_termination(&x[4],&x[5],&state0); #ifdef DEBUG_TURBO_ENCODER - printf("term: x0 %d, x1 %d, state0 %d\n",x[4],x[5],state0); + printf("term: x0 %u, x1 %u, state0 %d\n",x[4],x[5],state0); #endif //DEBUG_TURBO_ENCODER - threegpplte_rsc_termination(&x[6],&x[7],&state1); - #ifdef DEBUG_TURBO_ENCODER - printf("term: x0 %d, x1 %d, state1 %d\n",x[6],x[7],state1); + printf("term: x0 %u, x1 %u, state1 %d\n",x[6],x[7],state1); #endif //DEBUG_TURBO_ENCODER threegpplte_rsc_termination(&x[8],&x[9],&state1); #ifdef DEBUG_TURBO_ENCODER - printf("term: x0 %d, x1 %d, state1 %d\n",x[8],x[9],state1); + printf("term: x0 %u, x1 %u, state1 %d\n",x[8],x[9],state1); #endif //DEBUG_TURBO_ENCODER threegpplte_rsc_termination(&x[10],&x[11],&state1); - #ifdef DEBUG_TURBO_ENCODER - printf("term: x0 %d, x1 %d, state1 %d\n",x[10],x[11],state1); + printf("term: x0 %u, x1 %u, state1 %d\n",x[10],x[11],state1); #endif //DEBUG_TURBO_ENCODER #if defined(__x86_64__) || defined(__i386__) _mm_empty(); @@ -638,32 +578,31 @@ void threegpplte_turbo_encoder_sse(unsigned char *input, } void init_encoder_sse (void) { - treillis_table_init(); + treillis_table_init(); } /* function which will be called by the shared lib loader, to check shared lib version against main exec version. version mismatch no considered as fatal (interfaces not supposed to change) -*/ -int coding_checkbuildver(char * mainexec_buildversion, char ** shlib_buildversion) -{ +*/ +int coding_checkbuildver(char *mainexec_buildversion, char **shlib_buildversion) { #ifndef PACKAGE_VERSION #define PACKAGE_VERSION "standalone built: " __DATE__ __TIME__ #endif - *shlib_buildversion = PACKAGE_VERSION; - if (strcmp(mainexec_buildversion, *shlib_buildversion) != 0) { - fprintf(stderr,"[CODING] shared lib version %s, doesn't match main version %s, compatibility should be checked\n", - mainexec_buildversion,*shlib_buildversion); - } - return 0; + *shlib_buildversion = PACKAGE_VERSION; + + if (strcmp(mainexec_buildversion, *shlib_buildversion) != 0) { + fprintf(stderr,"[CODING] shared lib version %s, doesn't match main version %s, compatibility should be checked\n", + mainexec_buildversion,*shlib_buildversion); + } + + return 0; } #ifdef TC_MAIN -#define INPUT_LENGTH 20 +#define INPUT_LENGTH 20 #define F1 21 #define F2 120 -int main(int argc,char **argv) -{ - +int main(int argc,char **argv) { unsigned char input[INPUT_LENGTH+32],state,state2; unsigned char output[12+(3*(INPUT_LENGTH<<3))],x,z; int i; @@ -680,28 +619,27 @@ int main(int argc,char **argv) printf("\n"); for (state=0; state<8; state++) { - state2=state; threegpplte_rsc_termination(&x,&z,&state2); printf("Termination: (%d->%d) : (%d,%d)\n",state,state2,x,z); } - memset((void*)input,0,INPUT_LENGTH+16); + memset((void *)input,0,INPUT_LENGTH+16); + for (i=0; i<INPUT_LENGTH; i++) { input[i] = i*219; - printf("Input %d : %d\n",i,input[i]); + printf("Input %d : %u\n",i,input[i]); } threegpplte_turbo_encoder_sse(&input[0], - INPUT_LENGTH, - &output[0], - 0); + INPUT_LENGTH, + &output[0], + 0); + for (i=0; i<12+(INPUT_LENGTH*24); i++) + printf("%u",output[i]); - for (i=0;i<12+(INPUT_LENGTH*24);i++) - printf("%d",output[i]); printf("\n"); - return(0); } diff --git a/openair1/PHY/CODING/3gpplte_turbo_decoder_sse.c b/openair1/PHY/CODING/3gpplte_turbo_decoder_sse.c index a02af690770..9d4e6a312a4 100644 --- a/openair1/PHY/CODING/3gpplte_turbo_decoder_sse.c +++ b/openair1/PHY/CODING/3gpplte_turbo_decoder_sse.c @@ -38,33 +38,33 @@ #include "PHY/sse_intrin.h" #ifndef TEST_DEBUG -#include "PHY/defs.h" -#include "PHY/CODING/defs.h" -#include "PHY/CODING/lte_interleaver_inline.h" + #include "PHY/defs.h" + #include "PHY/CODING/defs.h" + #include "PHY/CODING/lte_interleaver_inline.h" #else -#include "defs.h" -#include <stdio.h> -#include <stdlib.h> -#include <string.h> + #include "defs.h" + #include <stdio.h> + #include <stdlib.h> + #include <string.h> #endif #define SHUFFLE16(a,b,c,d,e,f,g,h) _mm_set_epi8(h==-1?-1:h*2+1, \ - h==-1?-1:h*2, \ - g==-1?-1:g*2+1, \ - g==-1?-1:g*2, \ - f==-1?-1:f*2+1, \ - f==-1?-1:f*2, \ - e==-1?-1:e*2+1, \ - e==-1?-1:e*2, \ - d==-1?-1:d*2+1, \ - d==-1?-1:d*2, \ - c==-1?-1:c*2+1, \ - c==-1?-1:c*2, \ - b==-1?-1:b*2+1, \ - b==-1?-1:b*2, \ - a==-1?-1:a*2+1, \ - a==-1?-1:a*2); + h==-1?-1:h*2, \ + g==-1?-1:g*2+1, \ + g==-1?-1:g*2, \ + f==-1?-1:f*2+1, \ + f==-1?-1:f*2, \ + e==-1?-1:e*2+1, \ + e==-1?-1:e*2, \ + d==-1?-1:d*2+1, \ + d==-1?-1:d*2, \ + c==-1?-1:c*2+1, \ + c==-1?-1:c*2, \ + b==-1?-1:b*2+1, \ + b==-1?-1:b*2, \ + a==-1?-1:a*2+1, \ + a==-1?-1:a*2); @@ -75,44 +75,40 @@ #ifdef LLR8 -typedef int8_t llr_t; // internal decoder LLR data is 8-bit fixed -typedef int8_t channel_t; -#define MAX 64 + typedef int8_t llr_t; // internal decoder LLR data is 8-bit fixed + typedef int8_t channel_t; + #define MAX 64 #else -typedef int16_t llr_t; // internal decoder LLR data is 16-bit fixed -typedef int16_t channel_t; -#define MAX 256 + typedef int16_t llr_t; // internal decoder LLR data is 16-bit fixed + typedef int16_t channel_t; + #define MAX 256 #endif -void log_map (llr_t* systematic,channel_t* y_parity, llr_t* m11, llr_t* m10, llr_t *alpha, llr_t *beta, llr_t* ext,unsigned short frame_length,unsigned char term_flag,unsigned char F,int offset8_flag, +void log_map (llr_t *systematic,channel_t *y_parity, llr_t *m11, llr_t *m10, llr_t *alpha, llr_t *beta, llr_t *ext,unsigned short frame_length,unsigned char term_flag,unsigned char F,int offset8_flag, time_stats_t *alpha_stats,time_stats_t *beta_stats,time_stats_t *gamma_stats,time_stats_t *ext_stats); -void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic, channel_t* y_parity, unsigned short frame_length,unsigned char term_flag); -void compute_alpha(llr_t*alpha,llr_t *beta, llr_t* m11,llr_t* m10, unsigned short frame_length,unsigned char F); -void compute_beta(llr_t*alpha, llr_t* beta,llr_t* m11,llr_t* m10, unsigned short frame_length,unsigned char F,int offset8_flag); -void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m11,llr_t* m10,llr_t* extrinsic, llr_t* ap, unsigned short frame_length); +void compute_gamma(llr_t *m11,llr_t *m10,llr_t *systematic, channel_t *y_parity, unsigned short frame_length,unsigned char term_flag); +void compute_alpha(llr_t *alpha,llr_t *beta, llr_t *m11,llr_t *m10, unsigned short frame_length,unsigned char F); +void compute_beta(llr_t *alpha, llr_t *beta,llr_t *m11,llr_t *m10, unsigned short frame_length,unsigned char F,int offset8_flag); +void compute_ext(llr_t *alpha,llr_t *beta,llr_t *m11,llr_t *m10,llr_t *extrinsic, llr_t *ap, unsigned short frame_length); -void print_bytes(char *s, __m128i *x) -{ - +void print_bytes(char *s, __m128i *x) { int8_t *tempb = (int8_t *)x; - printf("%s : %d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d\n",s, tempb[0],tempb[1],tempb[2],tempb[3],tempb[4],tempb[5],tempb[6],tempb[7], tempb[8],tempb[9],tempb[10],tempb[11],tempb[12],tempb[13],tempb[14],tempb[15]); - } -void log_map(llr_t* systematic, - channel_t* y_parity, - llr_t* m11, - llr_t* m10, +void log_map(llr_t *systematic, + channel_t *y_parity, + llr_t *m11, + llr_t *m10, llr_t *alpha, llr_t *beta, - llr_t* ext, + llr_t *ext, unsigned short frame_length, unsigned char term_flag, unsigned char F, @@ -120,13 +116,10 @@ void log_map(llr_t* systematic, time_stats_t *alpha_stats, time_stats_t *beta_stats, time_stats_t *gamma_stats, - time_stats_t *ext_stats) -{ - + time_stats_t *ext_stats) { #ifdef DEBUG_LOGMAP msg("log_map, frame_length %d\n",frame_length); #endif - start_meas(gamma_stats) ; compute_gamma(m11,m10,systematic,y_parity,frame_length,term_flag) ; stop_meas(gamma_stats); @@ -139,19 +132,15 @@ void log_map(llr_t* systematic, start_meas(ext_stats) ; compute_ext(alpha,beta,m11,m10,ext,systematic,frame_length) ; stop_meas(ext_stats); - - } -void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity, - unsigned short frame_length,unsigned char term_flag) -{ +void compute_gamma(llr_t *m11,llr_t *m10,llr_t *systematic,channel_t *y_parity, + unsigned short frame_length,unsigned char term_flag) { int k,K1; __m128i *systematic128 = (__m128i *)systematic; __m128i *y_parity128 = (__m128i *)y_parity; __m128i *m10_128 = (__m128i *)m10; __m128i *m11_128 = (__m128i *)m11; - #ifdef DEBUG_LOGMAP msg("compute_gamma, %p,%p,%p,%p,framelength %d\n",m11,m10,systematic,y_parity,frame_length); #endif @@ -159,7 +148,6 @@ void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity, K1=frame_length>>3; for (k=0; k<K1; k++) { - m11_128[k] = _mm_srai_epi16(_mm_adds_epi16(systematic128[k],y_parity128[k]),1); m10_128[k] = _mm_srai_epi16(_mm_subs_epi16(systematic128[k],y_parity128[k]),1); /* @@ -206,13 +194,11 @@ void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity, (int16_t)_mm_extract_epi16(m10_128[k],6), (int16_t)_mm_extract_epi16(m10_128[k],7)); */ - } // Termination m11_128[k] = _mm_srai_epi16(_mm_adds_epi16(systematic128[k+term_flag],y_parity128[k]),1); m10_128[k] = _mm_srai_epi16(_mm_subs_epi16(systematic128[k+term_flag],y_parity128[k]),1); - // printf("gamma (term): %d,%d, %d,%d, %d,%d\n",m11[k<<3],m10[k<<3],m11[1+(k<<3)],m10[1+(k<<3)],m11[2+(k<<3)],m10[2+(k<<3)]); #else register __m128i sl,sh,ypl,yph; //K128=_mm_set1_epi8(-128); @@ -231,7 +217,6 @@ void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity, // m10_128[k] = _mm_subs_epi8(systematic128[k],y_parity128[k]); // m11_128[k] = _mm_sub_epi8(_mm_avg_epu8(_mm_add_epi8(systematic128[k],K128),_mm_add_epi8(y_parity128[k],K128)),K128); // m10_128[k] = _mm_sub_epi8(_mm_avg_epu8(_mm_add_epi8(systematic128[k],K128),_mm_add_epi8(_mm_sign_epi8(y_parity128[k],K128),K128)),K128); - /* printf("gamma %d: s %d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d\n", k, @@ -309,7 +294,6 @@ void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity, } // Termination - sl = _mm_cvtepi8_epi16(systematic128[k+term_flag]); sh = _mm_cvtepi8_epi16(_mm_srli_si128(systematic128[k],8)); ypl = _mm_cvtepi8_epi16(y_parity128[k+term_flag]); @@ -318,7 +302,6 @@ void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity, _mm_srai_epi16(_mm_adds_epi16(sh,yph),1)); m10_128[k] = _mm_packs_epi16(_mm_srai_epi16(_mm_subs_epi16(sl,ypl),1), _mm_srai_epi16(_mm_subs_epi16(sh,yph),1)); - // m11_128[k] = _mm_adds_epi8(systematic128[k+term_flag],y_parity128[k]); // m10_128[k] = _mm_subs_epi8(systematic128[k+term_flag],y_parity128[k]); // m11_128[k] = _mm_sub_epi8(_mm_avg_epu8(_mm_add_epi8(systematic128[k+term_flag],K128),_mm_add_epi8(y_parity128[k],K128)),K128); @@ -383,20 +366,17 @@ void compute_gamma(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity, #endif _mm_empty(); _m_empty(); - } #define L 40 -void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned short frame_length,unsigned char F) -{ +void compute_alpha(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned short frame_length,unsigned char F) { int k,l,l2,K1,rerun_flag=0; __m128i *alpha128=(__m128i *)alpha,*alpha_ptr; __m128i a0,a1,a2,a3,a4,a5,a6,a7,*m11p,*m10p; __m128i m_b0,m_b1,m_b2,m_b3,m_b4,m_b5,m_b6,m_b7; __m128i new0,new1,new2,new3,new4,new5,new6,new7; __m128i alpha_max; - #ifndef LLR8 l2 = L>>3; K1 = (frame_length>>3); @@ -439,19 +419,16 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho } alpha_ptr = &alpha128[0]; - - m11p = (__m128i*)m_11; - m10p = (__m128i*)m_10; + m11p = (__m128i *)m_11; + m10p = (__m128i *)m_10; for (k=0; k<l; k++) { - a1=_mm_load_si128(&alpha_ptr[1]); a3=_mm_load_si128(&alpha_ptr[3]); a5=_mm_load_si128(&alpha_ptr[5]); a7=_mm_load_si128(&alpha_ptr[7]); - m_b0 = _mm_adds_epi16(a1,*m11p); // m11 m_b4 = _mm_subs_epi16(a1,*m11p); // m00=-m11 m_b1 = _mm_subs_epi16(a3,*m10p); // m01=-m10 @@ -460,12 +437,10 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho m_b6 = _mm_subs_epi16(a5,*m10p); // m01=-m10 m_b3 = _mm_subs_epi16(a7,*m11p); // m00=-m11 m_b7 = _mm_adds_epi16(a7,*m11p); // m11 - a0=_mm_load_si128(&alpha_ptr[0]); a2=_mm_load_si128(&alpha_ptr[2]); a4=_mm_load_si128(&alpha_ptr[4]); a6=_mm_load_si128(&alpha_ptr[6]); - new0 = _mm_subs_epi16(a0,*m11p); // m00=-m11 new4 = _mm_adds_epi16(a0,*m11p); // m11 new1 = _mm_adds_epi16(a2,*m10p); // m10 @@ -474,7 +449,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho new6 = _mm_adds_epi16(a4,*m10p); // m10 new3 = _mm_adds_epi16(a6,*m11p); // m11 new7 = _mm_subs_epi16(a6,*m11p); // m00=-m11 - a0 = _mm_max_epi16(m_b0,new0); a1 = _mm_max_epi16(m_b1,new1); a2 = _mm_max_epi16(m_b2,new2); @@ -483,7 +457,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho a5 = _mm_max_epi16(m_b5,new5); a6 = _mm_max_epi16(m_b6,new6); a7 = _mm_max_epi16(m_b7,new7); - alpha_max = _mm_max_epi16(a0,a1); alpha_max = _mm_max_epi16(alpha_max,a2); alpha_max = _mm_max_epi16(alpha_max,a3); @@ -491,7 +464,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho alpha_max = _mm_max_epi16(alpha_max,a5); alpha_max = _mm_max_epi16(alpha_max,a6); alpha_max = _mm_max_epi16(alpha_max,a7); - alpha_ptr+=8; m11p++; m10p++; @@ -503,7 +475,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho alpha_ptr[5] = _mm_subs_epi16(a5,alpha_max); alpha_ptr[6] = _mm_subs_epi16(a6,alpha_max); alpha_ptr[7] = _mm_subs_epi16(a7,alpha_max); - } /* @@ -981,9 +952,7 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho */ #else - if (rerun_flag == 0) { - alpha128[0] = _mm_set_epi8(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,0); alpha128[1] = _mm_set_epi8(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2); alpha128[2] = _mm_set_epi8(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2); @@ -992,8 +961,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho alpha128[5] = _mm_set_epi8(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2); alpha128[6] = _mm_set_epi8(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2); alpha128[7] = _mm_set_epi8(-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2,-MAX/2); - - } else { alpha128[0] = _mm_slli_si128(alpha128[(K1<<3)],1); alpha128[1] = _mm_slli_si128(alpha128[1+(K1<<3)],1); @@ -1025,15 +992,12 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho print_bytes("a6:",&alpha_ptr[6]); print_bytes("a7:",&alpha_ptr[7]); */ - - m11p = (__m128i*)m_11; - m10p = (__m128i*)m_10; + m11p = (__m128i *)m_11; + m10p = (__m128i *)m_10; for (k=0; k<l; k++) { - - m_b0 = _mm_adds_epi8(alpha_ptr[1],*m11p); // m11 m_b4 = _mm_subs_epi8(alpha_ptr[1],*m11p); // m00=-m11 m_b1 = _mm_subs_epi8(alpha_ptr[3],*m10p); // m01=-m10 @@ -1042,7 +1006,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho m_b6 = _mm_subs_epi8(alpha_ptr[5],*m10p); // m01=-m10 m_b3 = _mm_subs_epi8(alpha_ptr[7],*m11p); // m00=-m11 m_b7 = _mm_adds_epi8(alpha_ptr[7],*m11p); // m11 - new0 = _mm_subs_epi8(alpha_ptr[0],*m11p); // m00=-m11 new4 = _mm_adds_epi8(alpha_ptr[0],*m11p); // m11 new1 = _mm_adds_epi8(alpha_ptr[2],*m10p); // m10 @@ -1051,7 +1014,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho new6 = _mm_adds_epi8(alpha_ptr[4],*m10p); // m10 new3 = _mm_adds_epi8(alpha_ptr[6],*m11p); // m11 new7 = _mm_subs_epi8(alpha_ptr[6],*m11p); // m00=-m11 - alpha_ptr += 8; m11p++; m10p++; @@ -1063,8 +1025,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho alpha_ptr[5] = _mm_max_epi8(m_b5,new5); alpha_ptr[6] = _mm_max_epi8(m_b6,new6); alpha_ptr[7] = _mm_max_epi8(m_b7,new7); - - // compute and subtract maxima alpha_max = _mm_max_epi8(alpha_ptr[0],alpha_ptr[1]); alpha_max = _mm_max_epi8(alpha_max,alpha_ptr[2]); @@ -1073,7 +1033,6 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho alpha_max = _mm_max_epi8(alpha_max,alpha_ptr[5]); alpha_max = _mm_max_epi8(alpha_max,alpha_ptr[6]); alpha_max = _mm_max_epi8(alpha_max,alpha_ptr[7]); - alpha_ptr[0] = _mm_subs_epi8(alpha_ptr[0],alpha_max); alpha_ptr[1] = _mm_subs_epi8(alpha_ptr[1],alpha_max); alpha_ptr[2] = _mm_subs_epi8(alpha_ptr[2],alpha_max); @@ -1109,14 +1068,11 @@ void compute_alpha(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sho } -void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned short frame_length,unsigned char F,int offset8_flag) -{ - +void compute_beta(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned short frame_length,unsigned char F,int offset8_flag) { int k,rerun_flag=0; __m128i m11_128,m10_128; __m128i m_b0,m_b1,m_b2,m_b3,m_b4,m_b5,m_b6,m_b7; __m128i new0,new1,new2,new3,new4,new5,new6,new7; - __m128i *beta128,*alpha128,*beta_ptr; __m128i beta_max; int16_t m11,m10,beta0_16,beta1_16,beta2_16,beta3_16,beta4_16,beta5_16,beta6_16,beta7_16,beta0_2,beta1_2,beta2_2,beta3_2,beta_m; @@ -1124,30 +1080,21 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor #ifdef LLR8 llr_t beta2,beta3,beta4,beta5,beta6,beta7; __m128i beta_16; - #endif - #ifdef DEBUG_LOGMAP msg("compute_beta, %p,%p,%p,%p,framelength %d,F %d\n", beta,m_11,m_10,alpha,frame_length,F); #endif - - // termination for beta initialization - // printf("beta init: offset8 %d\n",offset8_flag); m11=(int16_t)m_11[2+frame_length]; m10=(int16_t)m_10[2+frame_length]; - // printf("m11,m10 %d,%d\n",m11,m10); - beta0 = -m11;//M0T_TERM; beta1 = m11;//M1T_TERM; m11=(int16_t)m_11[1+frame_length]; m10=(int16_t)m_10[1+frame_length]; - // printf("m11,m10 %d,%d\n",m11,m10); - beta0_2 = beta0-m11;//+M0T_TERM; beta1_2 = beta0+m11;//+M1T_TERM; beta2_2 = beta1+m10;//M2T_TERM; @@ -1155,7 +1102,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor m11=(int16_t)m_11[frame_length]; m10=(int16_t)m_10[frame_length]; // printf("m11,m10 %d,%d (%p)\n",m11,m10,m_11+frame_length); - beta0_16 = beta0_2-m11;//+M0T_TERM; beta1_16 = beta0_2+m11;//+M1T_TERM; beta2_16 = beta1_2+m10;//+M2T_TERM; @@ -1164,8 +1110,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor beta5_16 = beta2_2+m10;//+M5T_TERM; beta6_16 = beta3_2+m11;//+M6T_TERM; beta7_16 = beta3_2-m11;//+M7T_TERM; - - beta_m = (beta0_16>beta1_16) ? beta0_16 : beta1_16; beta_m = (beta_m>beta2_16) ? beta_m : beta2_16; beta_m = (beta_m>beta3_16) ? beta_m : beta3_16; @@ -1173,8 +1117,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor beta_m = (beta_m>beta5_16) ? beta_m : beta5_16; beta_m = (beta_m>beta6_16) ? beta_m : beta6_16; beta_m = (beta_m>beta7_16) ? beta_m : beta7_16; - - beta0_16=beta0_16-beta_m; beta1_16=beta1_16-beta_m; beta2_16=beta2_16-beta_m; @@ -1183,7 +1125,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor beta5_16=beta5_16-beta_m; beta6_16=beta6_16-beta_m; beta7_16=beta7_16-beta_m; - #ifdef LLR8 beta_16 = _mm_set_epi16(beta7_16,beta6_16,beta5_16,beta4_16,beta3_16,beta2_16,beta1_16,beta0_16); beta_16 = _mm_packs_epi16(beta_16,beta_16); @@ -1199,8 +1140,8 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor #endif for (rerun_flag=0;; rerun_flag=1) { - beta_ptr = (__m128i*)&beta[frame_length<<3]; - alpha128 = (__m128i*)&alpha[0]; + beta_ptr = (__m128i *)&beta[frame_length<<3]; + alpha128 = (__m128i *)&alpha[0]; if (rerun_flag == 0) { #ifndef LLR8 @@ -1223,9 +1164,8 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor beta_ptr[7] = alpha128[7+(frame_length>>1)]; #endif } else { - beta128 = (__m128i*)&beta[0]; + beta128 = (__m128i *)&beta[0]; #ifndef LLR8 - beta_ptr[0] = _mm_srli_si128(beta128[0],2); beta_ptr[1] = _mm_srli_si128(beta128[1],2); beta_ptr[2] = _mm_srli_si128(beta128[2],2); @@ -1255,7 +1195,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor beta_ptr[5] = _mm_insert_epi16(beta_ptr[5],beta5_16,7); beta_ptr[6] = _mm_insert_epi16(beta_ptr[6],beta6_16,7); beta_ptr[7] = _mm_insert_epi16(beta_ptr[7],beta7_16,7); - /* beta[7+(frame_length<<3)] = beta0_16; beta[15+(frame_length<<3)] = beta1_16; @@ -1277,18 +1216,15 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor beta_ptr[6] = _mm_insert_epi8(beta_ptr[6],beta6,15); beta_ptr[7] = _mm_insert_epi8(beta_ptr[7],beta7,15); } else { - } #endif - #ifndef LLR8 int loopval=((rerun_flag==0)?0:((frame_length-L)>>3)); for (k=(frame_length>>3)-1; k>=loopval; k--) { - m11_128=((__m128i*)m_11)[k]; - m10_128=((__m128i*)m_10)[k]; - + m11_128=((__m128i *)m_11)[k]; + m10_128=((__m128i *)m_10)[k]; m_b0 = _mm_adds_epi16(beta_ptr[4],m11_128); //m11 m_b1 = _mm_subs_epi16(beta_ptr[4],m11_128); //m00 m_b2 = _mm_subs_epi16(beta_ptr[5],m10_128); //m01 @@ -1297,7 +1233,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor m_b5 = _mm_subs_epi16(beta_ptr[6],m10_128); //m01 m_b6 = _mm_subs_epi16(beta_ptr[7],m11_128); //m00 m_b7 = _mm_adds_epi16(beta_ptr[7],m11_128); //m11 - new0 = _mm_subs_epi16(beta_ptr[0],m11_128); //m00 new1 = _mm_adds_epi16(beta_ptr[0],m11_128); //m11 new2 = _mm_adds_epi16(beta_ptr[1],m10_128); //m10 @@ -1306,9 +1241,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor new5 = _mm_adds_epi16(beta_ptr[2],m10_128); //m10 new6 = _mm_adds_epi16(beta_ptr[3],m11_128); //m11 new7 = _mm_subs_epi16(beta_ptr[3],m11_128); //m00 - beta_ptr-=8; - beta_ptr[0] = _mm_max_epi16(m_b0,new0); beta_ptr[1] = _mm_max_epi16(m_b1,new1); beta_ptr[2] = _mm_max_epi16(m_b2,new2); @@ -1317,7 +1250,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor beta_ptr[5] = _mm_max_epi16(m_b5,new5); beta_ptr[6] = _mm_max_epi16(m_b6,new6); beta_ptr[7] = _mm_max_epi16(m_b7,new7); - beta_max = _mm_max_epi16(beta_ptr[0],beta_ptr[1]); beta_max = _mm_max_epi16(beta_max ,beta_ptr[2]); beta_max = _mm_max_epi16(beta_max ,beta_ptr[3]); @@ -1325,7 +1257,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor beta_max = _mm_max_epi16(beta_max ,beta_ptr[5]); beta_max = _mm_max_epi16(beta_max ,beta_ptr[6]); beta_max = _mm_max_epi16(beta_max ,beta_ptr[7]); - beta_ptr[0] = _mm_subs_epi16(beta_ptr[0],beta_max); beta_ptr[1] = _mm_subs_epi16(beta_ptr[1],beta_max); beta_ptr[2] = _mm_subs_epi16(beta_ptr[2],beta_max); @@ -1334,14 +1265,11 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor beta_ptr[5] = _mm_subs_epi16(beta_ptr[5],beta_max); beta_ptr[6] = _mm_subs_epi16(beta_ptr[6],beta_max); beta_ptr[7] = _mm_subs_epi16(beta_ptr[7],beta_max); - - - } #else #ifdef DEBUG_LOGMAP - printf("beta0 %d: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n", + printf("beta0 %u: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n", (frame_length>>4), _mm_extract_epi8(beta_ptr[0],0), _mm_extract_epi8(beta_ptr[0],1), @@ -1359,7 +1287,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor _mm_extract_epi8(beta_ptr[0],13), _mm_extract_epi8(beta_ptr[0],14), _mm_extract_epi8(beta_ptr[0],15)); - printf("beta1 %d: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n", + printf("beta1 %u: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n", (frame_length>>4), _mm_extract_epi8(beta_ptr[1],0), _mm_extract_epi8(beta_ptr[1],1), @@ -1377,7 +1305,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor _mm_extract_epi8(beta_ptr[1],13), _mm_extract_epi8(beta_ptr[1],14), _mm_extract_epi8(beta_ptr[1],15)); - printf("beta2 %d: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n", + printf("beta2 %u: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n", (frame_length>>4), _mm_extract_epi8(beta_ptr[2],0), _mm_extract_epi8(beta_ptr[2],1), @@ -1395,7 +1323,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor _mm_extract_epi8(beta_ptr[2],13), _mm_extract_epi8(beta_ptr[2],14), _mm_extract_epi8(beta_ptr[2],15)); - printf("beta3 %d: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n", + printf("beta3 %u: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n", (frame_length>>4), _mm_extract_epi8(beta_ptr[3],0), _mm_extract_epi8(beta_ptr[3],1), @@ -1413,7 +1341,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor _mm_extract_epi8(beta_ptr[3],13), _mm_extract_epi8(beta_ptr[3],14), _mm_extract_epi8(beta_ptr[3],15)); - printf("beta4 %d: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n", + printf("beta4 %u: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n", (frame_length>>4), _mm_extract_epi8(beta_ptr[4],0), _mm_extract_epi8(beta_ptr[4],1), @@ -1431,7 +1359,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor _mm_extract_epi8(beta_ptr[4],13), _mm_extract_epi8(beta_ptr[4],14), _mm_extract_epi8(beta_ptr[4],15)); - printf("beta5 %d: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n", + printf("beta5 %u: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n", (frame_length>>4), _mm_extract_epi8(beta_ptr[5],0), _mm_extract_epi8(beta_ptr[5],1), @@ -1449,7 +1377,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor _mm_extract_epi8(beta_ptr[5],13), _mm_extract_epi8(beta_ptr[5],14), _mm_extract_epi8(beta_ptr[5],15)); - printf("beta6 %d: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n", + printf("beta6 %u: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n", (frame_length>>4), _mm_extract_epi8(beta_ptr[6],0), _mm_extract_epi8(beta_ptr[6],1), @@ -1467,7 +1395,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor _mm_extract_epi8(beta_ptr[6],13), _mm_extract_epi8(beta_ptr[6],14), _mm_extract_epi8(beta_ptr[6],15)); - printf("beta7 %d: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n", + printf("beta7 %u: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n", (frame_length>>4), _mm_extract_epi8(beta_ptr[7],0), _mm_extract_epi8(beta_ptr[7],1), @@ -1491,9 +1419,8 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor __m128i zeros=_mm_set1_epi8(0); for (k=(frame_length>>4)-1; k>=loopval; k--) { - - m11_128=((__m128i*)m_11)[k]; - m10_128=((__m128i*)m_10)[k]; + m11_128=((__m128i *)m_11)[k]; + m10_128=((__m128i *)m_10)[k]; /* if ((offset8_flag==1) && (k==((frame_length>>4)-9))) { beta_ptr[0] = _mm_insert_epi8(beta_ptr[0],beta0,15); @@ -1506,9 +1433,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor beta_ptr[7] = _mm_insert_epi8(beta_ptr[7],beta7,15); }*/ // print_bytes("m11:",&m11_128); - - - m_b0 = _mm_adds_epi8(beta_ptr[4],m11_128); //m11 m_b1 = _mm_subs_epi8(beta_ptr[4],m11_128); //m00 m_b2 = _mm_subs_epi8(beta_ptr[5],m10_128); //m01 @@ -1517,7 +1441,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor m_b5 = _mm_subs_epi8(beta_ptr[6],m10_128); //m01 m_b6 = _mm_subs_epi8(beta_ptr[7],m11_128); //m00 m_b7 = _mm_adds_epi8(beta_ptr[7],m11_128); //m11 - new0 = _mm_subs_epi8(beta_ptr[0],m11_128); //m00 new1 = _mm_adds_epi8(beta_ptr[0],m11_128); //m11 new2 = _mm_adds_epi8(beta_ptr[1],m10_128); //m10 @@ -1526,9 +1449,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor new5 = _mm_adds_epi8(beta_ptr[2],m10_128); //m10 new6 = _mm_adds_epi8(beta_ptr[3],m11_128); //m11 new7 = _mm_subs_epi8(beta_ptr[3],m11_128); //m00 - beta_ptr-=8; - beta_ptr[0] = _mm_max_epi8(m_b0,new0); beta_ptr[1] = _mm_max_epi8(m_b1,new1); beta_ptr[2] = _mm_max_epi8(m_b2,new2); @@ -1537,7 +1458,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor beta_ptr[5] = _mm_max_epi8(m_b5,new5); beta_ptr[6] = _mm_max_epi8(m_b6,new6); beta_ptr[7] = _mm_max_epi8(m_b7,new7); - beta_max = _mm_max_epi8(beta_ptr[0],beta_ptr[1]); beta_max = _mm_max_epi8(beta_max ,beta_ptr[2]); beta_max = _mm_max_epi8(beta_max ,beta_ptr[3]); @@ -1553,7 +1473,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor beta_ptr[5] = _mm_subs_epi8(beta_ptr[5],beta_max); beta_ptr[6] = _mm_subs_epi8(beta_ptr[6],beta_max); beta_ptr[7] = _mm_subs_epi8(beta_ptr[7],beta_max); - /* printf("beta0 %d: %03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d,%03d\n", k, @@ -1700,7 +1619,6 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor _mm_extract_epi8(beta_ptr[7],14), _mm_extract_epi8(beta_ptr[7],15)); */ - } #endif @@ -1713,8 +1631,7 @@ void compute_beta(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned shor _m_empty(); } -void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, llr_t* systematic,unsigned short frame_length) -{ +void compute_ext(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,llr_t *ext, llr_t *systematic,unsigned short frame_length) { __m128i *alpha128=(__m128i *)alpha; __m128i *beta128=(__m128i *)beta; __m128i *m11_128,*m10_128,*ext_128; @@ -1724,26 +1641,20 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll __m128i m10_1,m10_2,m10_3,m10_4; __m128i m11_1,m11_2,m11_3,m11_4; int k; - // // LLR computation, 8 consequtive bits per loop // - #ifdef DEBUG_LOGMAP msg("compute_ext, %p, %p, %p, %p, %p, %p ,framelength %d\n",alpha,beta,m_11,m_10,ext,systematic,frame_length); #endif - alpha_ptr = alpha128; beta_ptr = &beta128[8]; - - #ifndef LLR8 for (k=0; k<(frame_length>>3); k++) { - - m11_128 = (__m128i*)&m_11[k<<3]; - m10_128 = (__m128i*)&m_10[k<<3]; - ext_128 = (__m128i*)&ext[k<<3]; + m11_128 = (__m128i *)&m_11[k<<3]; + m10_128 = (__m128i *)&m_10[k<<3]; + ext_128 = (__m128i *)&ext[k<<3]; /* printf("EXT %03d\n",k); print_shorts("a0:",&alpha_ptr[0]); @@ -1809,23 +1720,18 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll m11_1 = _mm_max_epi16(m11_1,m11_2); m11_1 = _mm_max_epi16(m11_1,m11_3); m11_1 = _mm_max_epi16(m11_1,m11_4); - // print_shorts("m11_1:",&m11_1); - m01_1 = _mm_subs_epi16(m01_1,*m10_128); m00_1 = _mm_subs_epi16(m00_1,*m11_128); m10_1 = _mm_adds_epi16(m10_1,*m10_128); m11_1 = _mm_adds_epi16(m11_1,*m11_128); - // print_shorts("m10_1:",&m10_1); // print_shorts("m11_1:",&m11_1); m01_1 = _mm_max_epi16(m01_1,m00_1); m10_1 = _mm_max_epi16(m10_1,m11_1); // print_shorts("m01_1:",&m01_1); // print_shorts("m10_1:",&m10_1); - *ext_128 = _mm_subs_epi16(m10_1,m01_1); - /* print_shorts("ext:",ext_128); print_shorts("m11:",m11_128); @@ -1834,7 +1740,6 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll print_shorts("m01_1:",&m01_1); print_shorts("syst:",systematic_128); */ - alpha_ptr+=8; beta_ptr+=8; } @@ -1842,11 +1747,9 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll #else for (k=0; k<(frame_length>>4); k++) { - - m11_128 = (__m128i*)&m_11[k<<4]; - m10_128 = (__m128i*)&m_10[k<<4]; - ext_128 = (__m128i*)&ext[k<<4]; - + m11_128 = (__m128i *)&m_11[k<<4]; + m10_128 = (__m128i *)&m_10[k<<4]; + ext_128 = (__m128i *)&ext[k<<4]; m00_4 = _mm_adds_epi8(alpha_ptr[7],beta_ptr[3]); //ALPHA_BETA_4m00; m11_4 = _mm_adds_epi8(alpha_ptr[7],beta_ptr[7]); //ALPHA_BETA_4m11; m00_3 = _mm_adds_epi8(alpha_ptr[6],beta_ptr[7]); //ALPHA_BETA_3m00; @@ -1863,7 +1766,6 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll m10_2 = _mm_adds_epi8(alpha_ptr[3],beta_ptr[5]); //ALPHA_BETA_2m10; m10_1 = _mm_adds_epi8(alpha_ptr[2],beta_ptr[1]); //ALPHA_BETA_1m10; m01_1 = _mm_adds_epi8(alpha_ptr[2],beta_ptr[5]); //ALPHA_BETA_1m01; - m01_1 = _mm_max_epi8(m01_1,m01_2); m01_1 = _mm_max_epi8(m01_1,m01_3); m01_1 = _mm_max_epi8(m01_1,m01_4); @@ -1876,29 +1778,20 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll m11_1 = _mm_max_epi8(m11_1,m11_2); m11_1 = _mm_max_epi8(m11_1,m11_3); m11_1 = _mm_max_epi8(m11_1,m11_4); - - m01_1 = _mm_subs_epi8(m01_1,*m10_128); m00_1 = _mm_subs_epi8(m00_1,*m11_128); m10_1 = _mm_adds_epi8(m10_1,*m10_128); m11_1 = _mm_adds_epi8(m11_1,*m11_128); - - m01_1 = _mm_max_epi8(m01_1,m00_1); m10_1 = _mm_max_epi8(m10_1,m11_1); - - *ext_128 = _mm_subs_epi8(m10_1,m01_1); - alpha_ptr+=8; beta_ptr+=8; } #endif - _mm_empty(); _m_empty(); - } @@ -1906,8 +1799,7 @@ void compute_ext(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, ll //int pi2[n],pi3[n+8],pi5[n+8],pi4[n+8],pi6[n+8], int *pi2tab[188],*pi5tab[188],*pi4tab[188],*pi6tab[188]; -void free_td() -{ +void free_td() { int ind; for (ind = 0; ind < 188; ind++) { @@ -1918,21 +1810,17 @@ void free_td() } } -void init_td() -{ - +void init_td() { int ind,i,i2,i3,j,n,n2,pi,pi3; - short * base_interleaver; + short *base_interleaver; for (ind=0; ind<188; ind++) { - n = f1f2mat[ind].nb_bits; base_interleaver=il_tb+f1f2mat[ind].beg_index; pi2tab[ind] = malloc((n+8)*sizeof(int)); pi5tab[ind] = malloc((n+8)*sizeof(int)); pi4tab[ind] = malloc((n+8)*sizeof(int)); pi6tab[ind] = malloc((n+8)*sizeof(int)); - #ifdef LLR8 if ((n&15)>0) { @@ -1941,7 +1829,6 @@ void init_td() n2 = n; for (j=0,i=0; i<n2; i++,j+=16) { - if (j>=n2) j-=(n2-1); @@ -1956,10 +1843,8 @@ void init_td() j=i2; for (i3=0; i3<(n>>3); i3++,i++,j+=8) { - // if (j>=n) // j-=(n-1); - pi2tab[ind][i] = j; // printf("pi2[%d] = %d\n",i,j); } @@ -1967,7 +1852,6 @@ void init_td() #endif - for (i=0; i<n2; i++) { pi = base_interleaver[i];//(unsigned int)threegpplte_interleaver(f1,f2,n); pi3 = pi2tab[ind][pi]; @@ -1975,7 +1859,6 @@ void init_td() pi5tab[ind][pi3] = pi2tab[ind][i]; pi6tab[ind][pi] = pi2tab[ind][i]; } - } } @@ -1991,33 +1874,25 @@ unsigned char phy_threegpplte_turbo_decoder(short *y, time_stats_t *gamma_stats, time_stats_t *ext_stats, time_stats_t *intl1_stats, - time_stats_t *intl2_stats) -{ - + time_stats_t *intl2_stats) { /* y is a pointer to the input decoded_bytes is a pointer to the decoded output n is the size in bits of the coded block, with the tail */ - int n2; #ifdef LLR8 llr_t y8[3*(n+16)] __attribute__((aligned(16))); #endif - llr_t systematic0[n+16] __attribute__ ((aligned(16))); llr_t systematic1[n+16] __attribute__ ((aligned(16))); llr_t systematic2[n+16] __attribute__ ((aligned(16))); llr_t yparity1[n+16] __attribute__ ((aligned(16))); llr_t yparity2[n+16] __attribute__ ((aligned(16))); - llr_t ext[n+128] __attribute__((aligned(16))); llr_t ext2[n+128] __attribute__((aligned(16))); - llr_t alpha[(n+16)*8] __attribute__ ((aligned(16))); llr_t beta[(n+16)*8] __attribute__ ((aligned(16))); llr_t m11[n+16] __attribute__ ((aligned(16))); llr_t m10[n+16] __attribute__ ((aligned(16))); - - int *pi2_p,*pi4_p,*pi5_p,*pi6_p; llr_t *s,*s1,*s2,*yp1,*yp2,*yp; __m128i *yp128; @@ -2026,12 +1901,10 @@ unsigned char phy_threegpplte_turbo_decoder(short *y, unsigned int crc,oldcrc,crc_len; uint8_t temp; __m128i tmp128[(n+8)>>3]; - __m128i tmp, zeros=_mm_setzero_si128(); #ifdef LLR8 __m128i MAX128=_mm_set1_epi16(MAX/2); #endif - register __m128i tmpe; int offset8_flag=0; @@ -2040,9 +1913,7 @@ unsigned char phy_threegpplte_turbo_decoder(short *y, return 255; } - start_meas(init_stats); - #ifdef LLR8 if ((n&15)>0) { @@ -2063,21 +1934,21 @@ unsigned char phy_threegpplte_turbo_decoder(short *y, } switch (crc_type) { - case CRC24_A: - case CRC24_B: - crc_len=3; - break; + case CRC24_A: + case CRC24_B: + crc_len=3; + break; - case CRC16: - crc_len=2; - break; + case CRC16: + crc_len=2; + break; - case CRC8: - crc_len=1; - break; + case CRC8: + crc_len=1; + break; - default: - crc_len=3; + default: + crc_len=3; } #ifdef LLR8 @@ -2087,199 +1958,154 @@ unsigned char phy_threegpplte_turbo_decoder(short *y, //((__m128i *)y8)[i] = _mm_packs_epi16(((__m128i *)y)[j],((__m128i *)y)[j+1]); } - yp128 = (__m128i*)y8; + yp128 = (__m128i *)y8; #else - yp128 = (__m128i*)y; + yp128 = (__m128i *)y; #endif - - - s = systematic0; s1 = systematic1; s2 = systematic2; yp1 = yparity1; yp2 = yparity2; - - #ifndef LLR8 for (i=0; i<n2; i+=8) { pi2_p = &pi2tab[iind][i]; - j=pi2_p[0]; - - tmpe = _mm_load_si128(yp128); - s[j] = _mm_extract_epi16(tmpe,0); yp1[j] = _mm_extract_epi16(tmpe,1); yp2[j] = _mm_extract_epi16(tmpe,2); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); - j=pi2_p[1]; - s[j] = _mm_extract_epi16(tmpe,3); yp1[j] = _mm_extract_epi16(tmpe,4); yp2[j] = _mm_extract_epi16(tmpe,5); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); - j=pi2_p[2]; - s[j] = _mm_extract_epi16(tmpe,6); yp1[j] = _mm_extract_epi16(tmpe,7); tmpe = _mm_load_si128(&yp128[1]); yp2[j] = _mm_extract_epi16(tmpe,0); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); - j=pi2_p[3]; - s[j] = _mm_extract_epi16(tmpe,1); yp1[j] = _mm_extract_epi16(tmpe,2); yp2[j] = _mm_extract_epi16(tmpe,3); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); - j=pi2_p[4]; - s[j] = _mm_extract_epi16(tmpe,4); yp1[j] = _mm_extract_epi16(tmpe,5); yp2[j] = _mm_extract_epi16(tmpe,6); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); - j=pi2_p[5]; - s[j] = _mm_extract_epi16(tmpe,7); tmpe = _mm_load_si128(&yp128[2]); yp1[j] = _mm_extract_epi16(tmpe,0); yp2[j] = _mm_extract_epi16(tmpe,1); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); - j=pi2_p[6]; - s[j] = _mm_extract_epi16(tmpe,2); yp1[j] = _mm_extract_epi16(tmpe,3); yp2[j] = _mm_extract_epi16(tmpe,4); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); - j=pi2_p[7]; - s[j] = _mm_extract_epi16(tmpe,5); yp1[j] = _mm_extract_epi16(tmpe,6); yp2[j] = _mm_extract_epi16(tmpe,7); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); - yp128+=3; - } #else for (i=0; i<n2; i+=16) { pi2_p = &pi2tab[iind][i]; - j=pi2_p[0]; s[j] = _mm_extract_epi8(yp128[0],0); yp1[j] = _mm_extract_epi8(yp128[0],1); yp2[j] = _mm_extract_epi8(yp128[0],2); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); - j=pi2_p[1]; s[j] = _mm_extract_epi8(yp128[0],3); yp1[j] = _mm_extract_epi8(yp128[0],4); yp2[j] = _mm_extract_epi8(yp128[0],5); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); - j=pi2_p[2]; s[j] = _mm_extract_epi8(yp128[0],6); yp1[j] = _mm_extract_epi8(yp128[0],7); yp2[j] = _mm_extract_epi8(yp128[0],8); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); - j=pi2_p[3]; s[j] = _mm_extract_epi8(yp128[0],9); yp1[j] = _mm_extract_epi8(yp128[0],10); yp2[j] = _mm_extract_epi8(yp128[0],11); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); - j=pi2_p[4]; s[j] = _mm_extract_epi8(yp128[0],12); yp1[j] = _mm_extract_epi8(yp128[0],13); yp2[j] = _mm_extract_epi8(yp128[0],14); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); - j=pi2_p[5]; s[j] = _mm_extract_epi8(yp128[0],15); yp1[j] = _mm_extract_epi8(yp128[1],0); yp2[j] = _mm_extract_epi8(yp128[1],1); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); - j=pi2_p[6]; s[j] = _mm_extract_epi8(yp128[1],2); yp1[j] = _mm_extract_epi8(yp128[1],3); yp2[j] = _mm_extract_epi8(yp128[1],4); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); - j=pi2_p[7]; s[j] = _mm_extract_epi8(yp128[1],5); yp1[j] = _mm_extract_epi8(yp128[1],6); yp2[j] = _mm_extract_epi8(yp128[1],7); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); - j=pi2_p[8]; s[j] = _mm_extract_epi8(yp128[1],8); yp1[j] = _mm_extract_epi8(yp128[1],9); yp2[j] = _mm_extract_epi8(yp128[1],10); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); - j=pi2_p[9]; s[j] = _mm_extract_epi8(yp128[1],11); yp1[j] = _mm_extract_epi8(yp128[1],12); yp2[j] = _mm_extract_epi8(yp128[1],13); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); - j=pi2_p[10]; s[j] = _mm_extract_epi8(yp128[1],14); yp1[j] = _mm_extract_epi8(yp128[1],15); yp2[j] = _mm_extract_epi8(yp128[2],0); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); - j=pi2_p[11]; s[j] = _mm_extract_epi8(yp128[2],1); yp1[j] = _mm_extract_epi8(yp128[2],2); yp2[j] = _mm_extract_epi8(yp128[2],3); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); - j=pi2_p[12]; s[j] = _mm_extract_epi8(yp128[2],4); yp1[j] = _mm_extract_epi8(yp128[2],5); yp2[j] = _mm_extract_epi8(yp128[2],6); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); - j=pi2_p[13]; s[j] = _mm_extract_epi8(yp128[2],7); yp1[j] = _mm_extract_epi8(yp128[2],8); yp2[j] = _mm_extract_epi8(yp128[2],9); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); - j=pi2_p[14]; s[j] = _mm_extract_epi8(yp128[2],10); yp1[j] = _mm_extract_epi8(yp128[2],11); yp2[j] = _mm_extract_epi8(yp128[2],12); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); - j=pi2_p[15]; s[j] = _mm_extract_epi8(yp128[2],13); yp1[j] = _mm_extract_epi8(yp128[2],14); yp2[j] = _mm_extract_epi8(yp128[2],15); // printf("init: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); - yp128+=3; - } #endif - - - yp=(llr_t*)yp128; + yp=(llr_t *)yp128; #ifdef LLR8 if (n2>n) { @@ -2290,7 +2116,7 @@ unsigned char phy_threegpplte_turbo_decoder(short *y, s1[n+4]=0;s1[n+5]=0;s1[n+6]=0;s1[n+7]=0; s2[n]=0;s2[n+1]=0;s2[n+2]=0;s2[n+3]=0; s2[n+4]=0;s2[n+5]=0;s2[n+6]=0;s2[n+7]=0;*/ - yp=(llr_t*)(y8+n); + yp=(llr_t *)(y8+n); } #endif @@ -2341,68 +2167,55 @@ unsigned char phy_threegpplte_turbo_decoder(short *y, #ifdef DEBUG_LOGMAP msg("\n"); #endif //DEBUG_LOGMAP - stop_meas(init_stats); - // do log_map from first parity bit - log_map(systematic0,yparity1,m11,m10,alpha,beta,ext,n2,0,F,offset8_flag,alpha_stats,beta_stats,gamma_stats,ext_stats); while (iteration_cnt++ < max_iterations) { - #ifdef DEBUG_LOGMAP printf("\n*******************ITERATION %d (n %d), ext %p\n\n",iteration_cnt,n,ext); #endif //DEBUG_LOGMAP - start_meas(intl1_stats); #ifndef LLR8 - pi4_p=pi4tab[iind]; for (i=0; i<(n2>>3); i++) { // steady-state portion - - ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t*)ext)[*pi4_p++],0); - ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t*)ext)[*pi4_p++],1); - ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t*)ext)[*pi4_p++],2); - ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t*)ext)[*pi4_p++],3); - ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t*)ext)[*pi4_p++],4); - ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t*)ext)[*pi4_p++],5); - ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t*)ext)[*pi4_p++],6); - ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t*)ext)[*pi4_p++],7); + ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t *)ext)[*pi4_p++],0); + ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t *)ext)[*pi4_p++],1); + ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t *)ext)[*pi4_p++],2); + ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t *)ext)[*pi4_p++],3); + ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t *)ext)[*pi4_p++],4); + ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t *)ext)[*pi4_p++],5); + ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t *)ext)[*pi4_p++],6); + ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],((llr_t *)ext)[*pi4_p++],7); } #else - pi4_p=pi4tab[iind]; for (i=0; i<(n2>>4); i++) { // steady-state portion - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],0); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],1); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],2); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],3); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],4); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],5); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],6); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],7); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],8); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],9); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],10); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],11); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],12); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],13); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],14); - ((__m128i *)systematic2)[i]=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],15); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],0); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],1); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],2); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],3); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],4); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],5); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],6); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],7); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],8); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],9); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],10); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],11); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],12); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],13); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],14); + ((__m128i *)systematic2)[i]=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],15); } #endif - stop_meas(intl1_stats); - // do log_map from second parity bit - log_map(systematic2,yparity2,m11,m10,alpha,beta,ext2,n2,1,F,offset8_flag,alpha_stats,beta_stats,gamma_stats,ext_stats); - - #ifndef LLR8 pi5_p=pi5tab[iind]; @@ -2415,7 +2228,7 @@ unsigned char phy_threegpplte_turbo_decoder(short *y, tmp=_mm_insert_epi16(tmp,ext2[*pi5_p++],5); tmp=_mm_insert_epi16(tmp,ext2[*pi5_p++],6); tmp=_mm_insert_epi16(tmp,ext2[*pi5_p++],7); - ((__m128i *)systematic1)[i] = _mm_adds_epi16(_mm_subs_epi16(tmp,((__m128i*)ext)[i]),((__m128i *)systematic0)[i]); + ((__m128i *)systematic1)[i] = _mm_adds_epi16(_mm_subs_epi16(tmp,((__m128i *)ext)[i]),((__m128i *)systematic0)[i]); } if (iteration_cnt>1) { @@ -2423,17 +2236,16 @@ unsigned char phy_threegpplte_turbo_decoder(short *y, pi6_p=pi6tab[iind]; for (i=0; i<(n2>>3); i++) { - tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],7); - tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],6); - tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],5); - tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],4); - tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],3); - tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],2); - tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],1); - tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],0); + tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],7); + tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],6); + tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],5); + tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],4); + tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],3); + tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],2); + tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],1); + tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],0); tmp=_mm_cmpgt_epi8(_mm_packs_epi16(tmp,zeros),zeros); decoded_bytes[i]=(unsigned char)_mm_movemask_epi8(tmp); - } } @@ -2460,8 +2272,7 @@ unsigned char phy_threegpplte_turbo_decoder(short *y, tmp=_mm_insert_epi8(tmp,ext2[*pi5_p++],15); //decoded_bytes_interl[i]=(uint16_t) _mm_movemask_epi8(_mm_cmpgt_epi8(tmp,zeros)); tmp128[i] = _mm_adds_epi8(((__m128i *)ext2)[i],((__m128i *)systematic2)[i]); - - ((__m128i *)systematic1)[i] = _mm_adds_epi8(_mm_subs_epi8(tmp,((__m128i*)ext)[i]),((__m128i *)systematic0)[i]); + ((__m128i *)systematic1)[i] = _mm_adds_epi8(_mm_subs_epi8(tmp,((__m128i *)ext)[i]),((__m128i *)systematic0)[i]); } /* LT modification, something wrong here @@ -2526,41 +2337,40 @@ unsigned char phy_threegpplte_turbo_decoder(short *y, oldcrc= *((unsigned int *)(&decoded_bytes[(n>>3)-crc_len])); switch (crc_type) { - - case CRC24_A: - oldcrc&=0x00ffffff; - crc = crc24a(&decoded_bytes[F>>3], - n-24-F)>>8; - temp=((uint8_t *)&crc)[2]; - ((uint8_t *)&crc)[2] = ((uint8_t *)&crc)[0]; - ((uint8_t *)&crc)[0] = temp; - break; - - case CRC24_B: - oldcrc&=0x00ffffff; - crc = crc24b(decoded_bytes, - n-24)>>8; - temp=((uint8_t *)&crc)[2]; - ((uint8_t *)&crc)[2] = ((uint8_t *)&crc)[0]; - ((uint8_t *)&crc)[0] = temp; - break; - - case CRC16: - oldcrc&=0x0000ffff; - crc = crc16(decoded_bytes, - n-16)>>16; - break; - - case CRC8: - oldcrc&=0x000000ff; - crc = crc8(decoded_bytes, - n-8)>>24; - break; - - default: - printf("FATAL: 3gpplte_turbo_decoder_sse.c: Unknown CRC\n"); - return(255); - break; + case CRC24_A: + oldcrc&=0x00ffffff; + crc = crc24a(&decoded_bytes[F>>3], + n-24-F)>>8; + temp=((uint8_t *)&crc)[2]; + ((uint8_t *)&crc)[2] = ((uint8_t *)&crc)[0]; + ((uint8_t *)&crc)[0] = temp; + break; + + case CRC24_B: + oldcrc&=0x00ffffff; + crc = crc24b(decoded_bytes, + n-24)>>8; + temp=((uint8_t *)&crc)[2]; + ((uint8_t *)&crc)[2] = ((uint8_t *)&crc)[0]; + ((uint8_t *)&crc)[0] = temp; + break; + + case CRC16: + oldcrc&=0x0000ffff; + crc = crc16(decoded_bytes, + n-16)>>16; + break; + + case CRC8: + oldcrc&=0x000000ff; + crc = crc8(decoded_bytes, + n-8)>>24; + break; + + default: + printf("FATAL: 3gpplte_turbo_decoder_sse.c: Unknown CRC\n"); + return(255); + break; } stop_meas(intl2_stats); @@ -2573,9 +2383,9 @@ unsigned char phy_threegpplte_turbo_decoder(short *y, // do log_map from first parity bit if (iteration_cnt < max_iterations) { log_map(systematic1,yparity1,m11,m10,alpha,beta,ext,n2,0,F,offset8_flag,alpha_stats,beta_stats,gamma_stats,ext_stats); - __m128i* ext_128=(__m128i*) ext; - __m128i* s1_128=(__m128i*) systematic1; - __m128i* s0_128=(__m128i*) systematic0; + __m128i *ext_128=(__m128i *) ext; + __m128i *s1_128=(__m128i *) systematic1; + __m128i *s0_128=(__m128i *) systematic0; #ifndef LLR8 int myloop=n2>>3; @@ -2601,27 +2411,21 @@ unsigned char phy_threegpplte_turbo_decoder(short *y, #ifdef TEST_DEBUG -int test_logmap8() -{ +int test_logmap8() { unsigned char test[8]; //_declspec(align(16)) char channel_output[512]; //_declspec(align(16)) unsigned char output[512],decoded_output[16], *inPtr, *outPtr; - short channel_output[512]; unsigned char output[512],decoded_output[16]; unsigned int i,crc,ret; - test[0] = 7; test[1] = 0xa5; test[2] = 0x11; test[3] = 0x92; test[4] = 0xfe; - crc = crc24a(test, 40)>>8; - - *(unsigned int*)(&test[5]) = crc; - + *(unsigned int *)(&test[5]) = crc; printf("crc24 = %x\n",crc); threegpplte_turbo_encoder(test, //input 8, //input length bytes @@ -2646,20 +2450,15 @@ int test_logmap8() 0, // filler bits 0); // decoder instance - for (i=0; i<8; i++) - printf("output %d => %x (input %x)\n",i,decoded_output[i],test[i]); + printf("output %u => %x (input %x)\n",i,decoded_output[i],test[i]); } -int main() -{ - - +int main() { test_logmap8(); - return(0); } diff --git a/openair1/PHY/CODING/3gpplte_turbo_decoder_sse_16bit.c b/openair1/PHY/CODING/3gpplte_turbo_decoder_sse_16bit.c index a25ba46ff8e..50fae438b76 100644 --- a/openair1/PHY/CODING/3gpplte_turbo_decoder_sse_16bit.c +++ b/openair1/PHY/CODING/3gpplte_turbo_decoder_sse_16bit.c @@ -41,53 +41,53 @@ #include "PHY/sse_intrin.h" #ifndef TEST_DEBUG -#include "PHY/impl_defs_top.h" -#include "PHY/defs_common.h" -#include "PHY/CODING/coding_defs.h" -#include "PHY/CODING/lte_interleaver_inline.h" + #include "PHY/impl_defs_top.h" + #include "PHY/defs_common.h" + #include "PHY/CODING/coding_defs.h" + #include "PHY/CODING/lte_interleaver_inline.h" #else -#include "defs.h" -#include <stdio.h> -#include <stdlib.h> -#include <string.h> + #include "defs.h" + #include <stdio.h> + #include <stdlib.h> + #include <string.h> #endif #ifdef MEX -#include "mex.h" + #include "mex.h" #endif //#define DEBUG_LOGMAP #ifdef DEBUG_LOGMAP -#define print_shorts(s,x) fprintf(fdsse4,"%s %d,%d,%d,%d,%d,%d,%d,%d\n",s,(x)[0],(x)[1],(x)[2],(x)[3],(x)[4],(x)[5],(x)[6],(x)[7]) + #define print_shorts(s,x) fprintf(fdsse4,"%s %d,%d,%d,%d,%d,%d,%d,%d\n",s,(x)[0],(x)[1],(x)[2],(x)[3],(x)[4],(x)[5],(x)[6],(x)[7]) #endif #undef __AVX2__ #ifdef DEBUG_LOGMAP -FILE *fdsse4; + FILE *fdsse4; #endif typedef int16_t llr_t; // internal decoder LLR data is 16-bit fixed typedef int16_t channel_t; #define MAX 256 -void log_map16(llr_t* systematic,channel_t* y_parity, llr_t* m11, llr_t* m10, llr_t *alpha, llr_t *beta, llr_t* ext,unsigned short frame_length,unsigned char term_flag,unsigned char F, +void log_map16(llr_t *systematic,channel_t *y_parity, llr_t *m11, llr_t *m10, llr_t *alpha, llr_t *beta, llr_t *ext,unsigned short frame_length,unsigned char term_flag,unsigned char F, int offset8_flag,time_stats_t *alpha_stats,time_stats_t *beta_stats,time_stats_t *gamma_stats,time_stats_t *ext_stats); -void compute_gamma16(llr_t* m11,llr_t* m10,llr_t* systematic, channel_t* y_parity, unsigned short frame_length,unsigned char term_flag); -void compute_alpha16(llr_t*alpha,llr_t *beta, llr_t* m11,llr_t* m10, unsigned short frame_length,unsigned char F); -void compute_beta16(llr_t*alpha, llr_t* beta,llr_t* m11,llr_t* m10, unsigned short frame_length,unsigned char F,int offset8_flag); -void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m11,llr_t* m10,llr_t* extrinsic, llr_t* ap, unsigned short frame_length); +void compute_gamma16(llr_t *m11,llr_t *m10,llr_t *systematic, channel_t *y_parity, unsigned short frame_length,unsigned char term_flag); +void compute_alpha16(llr_t *alpha,llr_t *beta, llr_t *m11,llr_t *m10, unsigned short frame_length,unsigned char F); +void compute_beta16(llr_t *alpha, llr_t *beta,llr_t *m11,llr_t *m10, unsigned short frame_length,unsigned char F,int offset8_flag); +void compute_ext16(llr_t *alpha,llr_t *beta,llr_t *m11,llr_t *m10,llr_t *extrinsic, llr_t *ap, unsigned short frame_length); -void log_map16(llr_t* systematic, - channel_t* y_parity, - llr_t* m11, - llr_t* m10, +void log_map16(llr_t *systematic, + channel_t *y_parity, + llr_t *m11, + llr_t *m10, llr_t *alpha, llr_t *beta, - llr_t* ext, + llr_t *ext, unsigned short frame_length, unsigned char term_flag, unsigned char F, @@ -95,13 +95,10 @@ void log_map16(llr_t* systematic, time_stats_t *alpha_stats, time_stats_t *beta_stats, time_stats_t *gamma_stats, - time_stats_t *ext_stats) -{ - + time_stats_t *ext_stats) { #ifdef DEBUG_LOGMAP fprintf(fdsse4,"log_map, frame_length %d\n",frame_length); #endif - start_meas(gamma_stats) ; compute_gamma16(m11,m10,systematic,y_parity,frame_length,term_flag) ; stop_meas(gamma_stats); @@ -114,13 +111,10 @@ void log_map16(llr_t* systematic, start_meas(ext_stats) ; compute_ext16(alpha,beta,m11,m10,ext,systematic,frame_length) ; stop_meas(ext_stats); - - } -void compute_gamma16(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity, - unsigned short frame_length,unsigned char term_flag) -{ +void compute_gamma16(llr_t *m11,llr_t *m10,llr_t *systematic,channel_t *y_parity, + unsigned short frame_length,unsigned char term_flag) { int k,K1; #if defined(__x86_64__)||defined(__i386__) __m128i *systematic128 = (__m128i *)systematic; @@ -133,18 +127,18 @@ void compute_gamma16(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity int16x8_t *m10_128 = (int16x8_t *)m10; int16x8_t *m11_128 = (int16x8_t *)m11; #endif - #ifdef DEBUG_LOGMAP fprintf(fdsse4,"compute_gamma (sse_16bit), %p,%p,%p,%p,framelength %d\n",m11,m10,systematic,y_parity,frame_length); #endif - #ifndef __AVX2__ K1=frame_length>>3; #else + if ((frame_length&15) > 0) K1=(frame_length+1)>>4; else K1=frame_length>>4; + #endif for (k=0; k<K1; k++) { @@ -153,21 +147,20 @@ void compute_gamma16(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity m11_128[k] = _mm_srai_epi16(_mm_adds_epi16(systematic128[k],y_parity128[k]),1); m10_128[k] = _mm_srai_epi16(_mm_subs_epi16(systematic128[k],y_parity128[k]),1); #else - ((__m256i*)m11_128)[k] = _mm256_srai_epi16(_mm256_adds_epi16(((__m256i*)systematic128)[k],((__m256i*)y_parity128)[k]),1); + ((__m256i *)m11_128)[k] = _mm256_srai_epi16(_mm256_adds_epi16(((__m256i *)systematic128)[k],((__m256i *)y_parity128)[k]),1); // ((__m256i*)m10_128)[k] = _mm256_srai_epi16(_mm256_subs_epi16(((__m256i*)y_parity128)[k],((__m256i*)systematic128)[k]),1); - ((__m256i*)m10_128)[k] = _mm256_srai_epi16(_mm256_subs_epi16(((__m256i*)systematic128)[k],((__m256i*)y_parity128)[k]),1); + ((__m256i *)m10_128)[k] = _mm256_srai_epi16(_mm256_subs_epi16(((__m256i *)systematic128)[k],((__m256i *)y_parity128)[k]),1); #endif #elif defined(__arm__) m11_128[k] = vhaddq_s16(systematic128[k],y_parity128[k]); m10_128[k] = vhsubq_s16(systematic128[k],y_parity128[k]); #endif - #ifdef DEBUG_LOGMAP fprintf(fdsse4,"Loop index k %d\n", k); - print_shorts("sys",(int16_t*)&systematic128[k]); - print_shorts("yp",(int16_t*)&y_parity128[k]); - print_shorts("m11",(int16_t*)&m11_128[k]); - print_shorts("m10",(int16_t*)&m10_128[k]); + print_shorts("sys",(int16_t *)&systematic128[k]); + print_shorts("yp",(int16_t *)&y_parity128[k]); + print_shorts("m11",(int16_t *)&m11_128[k]); + print_shorts("m10",(int16_t *)&m10_128[k]); #endif } @@ -185,20 +178,18 @@ void compute_gamma16(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity m11_128[k] = vhaddq_s16(systematic128[k+term_flag],y_parity128[k]); m10_128[k] = vhsubq_s16(systematic128[k+term_flag],y_parity128[k]); #endif - #ifdef DEBUG_LOGMAP -fprintf(fdsse4,"Loop index k %d (term flag %d)\n", k,term_flag); -print_shorts("sys",(int16_t*)&systematic128[k]); - print_shorts("yp",(int16_t*)&y_parity128[k]); - print_shorts("m11",(int16_t*)&m11_128[k]); - print_shorts("m10",(int16_t*)&m10_128[k]); + fprintf(fdsse4,"Loop index k %d (term flag %d)\n", k,term_flag); + print_shorts("sys",(int16_t *)&systematic128[k]); + print_shorts("yp",(int16_t *)&y_parity128[k]); + print_shorts("m11",(int16_t *)&m11_128[k]); + print_shorts("m10",(int16_t *)&m10_128[k]); #endif } #define L 40 -void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned short frame_length,unsigned char F) -{ +void compute_alpha16(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned short frame_length,unsigned char F) { int k,l,l2,K1,rerun_flag=0; #if defined(__x86_64__) || defined(__i386__) __m128i *alpha128=(__m128i *)alpha,*alpha_ptr,*m11p,*m10p; @@ -215,7 +206,6 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s __m256i m11m10_256; __m256i alpha_max; #endif - #elif defined(__arm__) int16x8_t *alpha128=(int16x8_t *)alpha,*alpha_ptr; int16x8_t a0,a1,a2,a3,a4,a5,a6,a7,*m11p,*m10p; @@ -228,6 +218,7 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s #ifdef DEBUG_LOGMAP fprintf(fdsse4,"compute_alpha (sse_16bit)\n"); #endif + for (l=K1;; l=l2,rerun_flag=1) { #if defined(__x86_64__) || defined(__i386__) alpha128 = (__m128i *)alpha; @@ -259,14 +250,14 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s #endif #ifdef DEBUG_LOGMAP fprintf(fdsse4,"Initial alpha\n"); - print_shorts("a0",(int16_t*)&alpha128[0]); - print_shorts("a1",(int16_t*)&alpha128[1]); - print_shorts("a2",(int16_t*)&alpha128[2]); - print_shorts("a3",(int16_t*)&alpha128[3]); - print_shorts("a4",(int16_t*)&alpha128[4]); - print_shorts("a5",(int16_t*)&alpha128[5]); - print_shorts("a6",(int16_t*)&alpha128[6]); - print_shorts("a7",(int16_t*)&alpha128[7]); + print_shorts("a0",(int16_t *)&alpha128[0]); + print_shorts("a1",(int16_t *)&alpha128[1]); + print_shorts("a2",(int16_t *)&alpha128[2]); + print_shorts("a3",(int16_t *)&alpha128[3]); + print_shorts("a4",(int16_t *)&alpha128[4]); + print_shorts("a5",(int16_t *)&alpha128[5]); + print_shorts("a6",(int16_t *)&alpha128[6]); + print_shorts("a7",(int16_t *)&alpha128[7]); #endif } else { //set initial alpha in columns 1-7 from final alpha from last run in columns 0-6 @@ -280,14 +271,22 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s alpha128[6] = _mm_slli_si128(alpha128[6+frame_length],2); alpha128[7] = _mm_slli_si128(alpha128[7+frame_length],2); #elif defined(__arm__) - alpha128[0] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[frame_length],16); alpha128[0] = vsetq_lane_s16(alpha[8],alpha128[0],3); - alpha128[1] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[1+frame_length],16); alpha128[1] = vsetq_lane_s16(alpha[24],alpha128[0],3); - alpha128[2] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[2+frame_length],16); alpha128[2] = vsetq_lane_s16(alpha[40],alpha128[0],3); - alpha128[3] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[3+frame_length],16); alpha128[3] = vsetq_lane_s16(alpha[56],alpha128[0],3); - alpha128[4] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[4+frame_length],16); alpha128[4] = vsetq_lane_s16(alpha[72],alpha128[0],3); - alpha128[5] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[5+frame_length],16); alpha128[5] = vsetq_lane_s16(alpha[88],alpha128[0],3); - alpha128[6] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[6+frame_length],16); alpha128[6] = vsetq_lane_s16(alpha[104],alpha128[0],3); - alpha128[7] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[7+frame_length],16); alpha128[7] = vsetq_lane_s16(alpha[120],alpha128[0],3); + alpha128[0] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[frame_length],16); + alpha128[0] = vsetq_lane_s16(alpha[8],alpha128[0],3); + alpha128[1] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[1+frame_length],16); + alpha128[1] = vsetq_lane_s16(alpha[24],alpha128[0],3); + alpha128[2] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[2+frame_length],16); + alpha128[2] = vsetq_lane_s16(alpha[40],alpha128[0],3); + alpha128[3] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[3+frame_length],16); + alpha128[3] = vsetq_lane_s16(alpha[56],alpha128[0],3); + alpha128[4] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[4+frame_length],16); + alpha128[4] = vsetq_lane_s16(alpha[72],alpha128[0],3); + alpha128[5] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[5+frame_length],16); + alpha128[5] = vsetq_lane_s16(alpha[88],alpha128[0],3); + alpha128[6] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[6+frame_length],16); + alpha128[6] = vsetq_lane_s16(alpha[104],alpha128[0],3); + alpha128[7] = (int16x8_t)vshlq_n_s64((int64x2_t)alpha128[7+frame_length],16); + alpha128[7] = vsetq_lane_s16(alpha[120],alpha128[0],3); #endif // set initial alpha in column 0 to (0,-MAX/2,...,-MAX/2) alpha[8] = -MAX/2; @@ -299,31 +298,30 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s alpha[56] = -MAX/2; #ifdef DEBUG_LOGMAP fprintf(fdsse4,"Second run\n"); - print_shorts("a0",(int16_t*)&alpha128[0]); - print_shorts("a1",(int16_t*)&alpha128[1]); - print_shorts("a2",(int16_t*)&alpha128[2]); - print_shorts("a3",(int16_t*)&alpha128[3]); - print_shorts("a4",(int16_t*)&alpha128[4]); - print_shorts("a5",(int16_t*)&alpha128[5]); - print_shorts("a6",(int16_t*)&alpha128[6]); - print_shorts("a7",(int16_t*)&alpha128[7]); + print_shorts("a0",(int16_t *)&alpha128[0]); + print_shorts("a1",(int16_t *)&alpha128[1]); + print_shorts("a2",(int16_t *)&alpha128[2]); + print_shorts("a3",(int16_t *)&alpha128[3]); + print_shorts("a4",(int16_t *)&alpha128[4]); + print_shorts("a5",(int16_t *)&alpha128[5]); + print_shorts("a6",(int16_t *)&alpha128[6]); + print_shorts("a7",(int16_t *)&alpha128[7]); #endif - } alpha_ptr = &alpha128[0]; //#ifdef __AVX2__ #if defined(__x86_64__) || defined(__i386__) - m11p = (__m128i*)m_11; - m10p = (__m128i*)m_10; + m11p = (__m128i *)m_11; + m10p = (__m128i *)m_10; #elif defined(__arm__) - m11p = (int16x8_t*)m_11; - m10p = (int16x8_t*)m_10; + m11p = (int16x8_t *)m_11; + m10p = (int16x8_t *)m_10; #endif + for (k=0; k<l; k++) { - #if defined(__x86_64__) || defined(__i386__) //#ifndef __AVX2__ #if 1 @@ -331,7 +329,6 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s a3=_mm_load_si128(&alpha_ptr[3]); a5=_mm_load_si128(&alpha_ptr[5]); a7=_mm_load_si128(&alpha_ptr[7]); - m_b0 = _mm_adds_epi16(a1,*m11p); // m11 m_b4 = _mm_subs_epi16(a1,*m11p); // m00=-m11 m_b1 = _mm_subs_epi16(a3,*m10p); // m01=-m10 @@ -340,12 +337,10 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s m_b6 = _mm_subs_epi16(a5,*m10p); // m01=-m10 m_b3 = _mm_subs_epi16(a7,*m11p); // m00=-m11 m_b7 = _mm_adds_epi16(a7,*m11p); // m11 - a0=_mm_load_si128(&alpha_ptr[0]); a2=_mm_load_si128(&alpha_ptr[2]); a4=_mm_load_si128(&alpha_ptr[4]); a6=_mm_load_si128(&alpha_ptr[6]); - new0 = _mm_subs_epi16(a0,*m11p); // m00=-m11 new4 = _mm_adds_epi16(a0,*m11p); // m11 new1 = _mm_adds_epi16(a2,*m10p); // m10 @@ -354,7 +349,6 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s new6 = _mm_adds_epi16(a4,*m10p); // m10 new3 = _mm_adds_epi16(a6,*m11p); // m11 new7 = _mm_subs_epi16(a6,*m11p); // m00=-m11 - a0 = _mm_max_epi16(m_b0,new0); a1 = _mm_max_epi16(m_b1,new1); a2 = _mm_max_epi16(m_b2,new2); @@ -363,7 +357,6 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s a5 = _mm_max_epi16(m_b5,new5); a6 = _mm_max_epi16(m_b6,new6); a7 = _mm_max_epi16(m_b7,new7); - alpha_max = _mm_max_epi16(a0,a1); alpha_max = _mm_max_epi16(alpha_max,a2); alpha_max = _mm_max_epi16(alpha_max,a3); @@ -378,29 +371,22 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s a75=_mm256_load_si256(&alpha_ptr256[3]); m11m10_256 = _mm256_insertf128_si256(m11m10_256,*m11p,0); m11m10_256 = _mm256_insertf128_si256(m11m10_256,*m10p,1); - - m_b01 = _mm256_adds_epi16(a13,m11m10_256); //negative m10 m_b23 = _mm256_subs_epi16(a75,m11m10_256); //negative m10 m_b45 = _mm256_subs_epi16(a13,m11m10_256); //negative m10 m_b67 = _mm256_adds_epi16(a75,m11m10_256); //negative m10 - new01 = _mm256_subs_epi16(a02,m11m10_256); //negative m10 new23 = _mm256_adds_epi16(a64,m11m10_256); //negative m10 new45 = _mm256_adds_epi16(a02,m11m10_256); //negative m10 new67 = _mm256_subs_epi16(a64,m11m10_256); //negative m10 - a01 = _mm256_max_epi16(m_b01,new01); a23 = _mm256_max_epi16(m_b23,new23); a45 = _mm256_max_epi16(m_b45,new45); a67 = _mm256_max_epi16(m_b67,new67); - alpha_max = _mm256_max_epi16(a01,a23); alpha_max = _mm256_max_epi16(alpha_max,a45); alpha_max = _mm256_max_epi16(alpha_max,a67); alpha_max = _mm256_max_epi16(alpha_max,_mm256_permutevar8x32_epi32(alpha_max,_mm256_set_epi32(3,2,1,0,7,6,5,4))); - - #endif #elif defined(__arm__) m_b0 = vqaddq_s16(alpha_ptr[1],*m11p); // m11 @@ -411,7 +397,6 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s m_b6 = vqsubq_s16(alpha_ptr[5],*m10p); // m01=-m10 m_b3 = vqsubq_s16(alpha_ptr[7],*m11p); // m00=-m11 m_b7 = vqaddq_s16(alpha_ptr[7],*m11p); // m11 - new0 = vqsubq_s16(alpha_ptr[0],*m11p); // m00=-m11 new4 = vqaddq_s16(alpha_ptr[0],*m11p); // m11 new1 = vqaddq_s16(alpha_ptr[2],*m10p); // m10 @@ -428,7 +413,6 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s a5 = vmaxq_s16(m_b5,new5); a6 = vmaxq_s16(m_b6,new6); a7 = vmaxq_s16(m_b7,new7); - // compute and subtract maxima alpha_max = vmaxq_s16(a0,a1); alpha_max = vmaxq_s16(alpha_max,a2); @@ -437,9 +421,7 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s alpha_max = vmaxq_s16(alpha_max,a5); alpha_max = vmaxq_s16(alpha_max,a6); alpha_max = vmaxq_s16(alpha_max,a7); - #endif - alpha_ptr+=8; //#ifdef __AVX2__ m11p++; @@ -456,12 +438,10 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s alpha_ptr[6] = _mm_subs_epi16(a6,alpha_max); alpha_ptr[7] = _mm_subs_epi16(a7,alpha_max); #else - a01 = _mm256_subs_epi16(a01,alpha_max); a23 = _mm256_subs_epi16(a23,alpha_max); a45 = _mm256_subs_epi16(a45,alpha_max); a67 = _mm256_subs_epi16(a67,alpha_max); - alpha_ptr256[0] = _mm256_permute2x128_si256(a01,a23,0x20); //a02 alpha_ptr256[1] = _mm256_permute2x128_si256(a01,a23,0x13); //a13 alpha_ptr256[2] = _mm256_permute2x128_si256(a45,a67,0x02); //a64 @@ -477,49 +457,44 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s alpha_ptr[6] = vqsubq_s16(a6,alpha_max); alpha_ptr[7] = vqsubq_s16(a7,alpha_max); #endif - #ifdef DEBUG_LOGMAP fprintf(fdsse4,"Loop index %d\n",k); - print_shorts("mb0",(int16_t*)&m_b0); - print_shorts("mb1",(int16_t*)&m_b1); - print_shorts("mb2",(int16_t*)&m_b2); - print_shorts("mb3",(int16_t*)&m_b3); - print_shorts("mb4",(int16_t*)&m_b4); - print_shorts("mb5",(int16_t*)&m_b5); - print_shorts("mb6",(int16_t*)&m_b6); - print_shorts("mb7",(int16_t*)&m_b7); - + print_shorts("mb0",(int16_t *)&m_b0); + print_shorts("mb1",(int16_t *)&m_b1); + print_shorts("mb2",(int16_t *)&m_b2); + print_shorts("mb3",(int16_t *)&m_b3); + print_shorts("mb4",(int16_t *)&m_b4); + print_shorts("mb5",(int16_t *)&m_b5); + print_shorts("mb6",(int16_t *)&m_b6); + print_shorts("mb7",(int16_t *)&m_b7); fprintf(fdsse4,"Loop index %d, new\n",k); - print_shorts("new0",(int16_t*)&new0); - print_shorts("new1",(int16_t*)&new1); - print_shorts("new2",(int16_t*)&new2); - print_shorts("new3",(int16_t*)&new3); - print_shorts("new4",(int16_t*)&new4); - print_shorts("new5",(int16_t*)&new5); - print_shorts("new6",(int16_t*)&new6); - print_shorts("new7",(int16_t*)&new7); - + print_shorts("new0",(int16_t *)&new0); + print_shorts("new1",(int16_t *)&new1); + print_shorts("new2",(int16_t *)&new2); + print_shorts("new3",(int16_t *)&new3); + print_shorts("new4",(int16_t *)&new4); + print_shorts("new5",(int16_t *)&new5); + print_shorts("new6",(int16_t *)&new6); + print_shorts("new7",(int16_t *)&new7); fprintf(fdsse4,"Loop index %d, after max\n",k); - print_shorts("a0",(int16_t*)&a0); - print_shorts("a1",(int16_t*)&a1); - print_shorts("a2",(int16_t*)&a2); - print_shorts("a3",(int16_t*)&a3); - print_shorts("a4",(int16_t*)&a4); - print_shorts("a5",(int16_t*)&a5); - print_shorts("a6",(int16_t*)&a6); - print_shorts("a7",(int16_t*)&a7); - + print_shorts("a0",(int16_t *)&a0); + print_shorts("a1",(int16_t *)&a1); + print_shorts("a2",(int16_t *)&a2); + print_shorts("a3",(int16_t *)&a3); + print_shorts("a4",(int16_t *)&a4); + print_shorts("a5",(int16_t *)&a5); + print_shorts("a6",(int16_t *)&a6); + print_shorts("a7",(int16_t *)&a7); fprintf(fdsse4,"Loop index %d\n",k); - print_shorts("a0",(int16_t*)&alpha_ptr[0]); - print_shorts("a1",(int16_t*)&alpha_ptr[1]); - print_shorts("a2",(int16_t*)&alpha_ptr[2]); - print_shorts("a3",(int16_t*)&alpha_ptr[3]); - print_shorts("a4",(int16_t*)&alpha_ptr[4]); - print_shorts("a5",(int16_t*)&alpha_ptr[5]); - print_shorts("a6",(int16_t*)&alpha_ptr[6]); - print_shorts("a7",(int16_t*)&alpha_ptr[7]); + print_shorts("a0",(int16_t *)&alpha_ptr[0]); + print_shorts("a1",(int16_t *)&alpha_ptr[1]); + print_shorts("a2",(int16_t *)&alpha_ptr[2]); + print_shorts("a3",(int16_t *)&alpha_ptr[3]); + print_shorts("a4",(int16_t *)&alpha_ptr[4]); + print_shorts("a5",(int16_t *)&alpha_ptr[5]); + print_shorts("a6",(int16_t *)&alpha_ptr[6]); + print_shorts("a7",(int16_t *)&alpha_ptr[7]); #endif - } if (rerun_flag==1) @@ -528,37 +503,28 @@ void compute_alpha16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned s } -void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned short frame_length,unsigned char F,int offset8_flag) -{ - +void compute_beta16(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned short frame_length,unsigned char F,int offset8_flag) { int k,rerun_flag=0; #if defined(__x86_64__) || defined(__i386__) __m128i m11_128,m10_128; __m128i m_b0,m_b1,m_b2,m_b3,m_b4,m_b5,m_b6,m_b7; __m128i new0,new1,new2,new3,new4,new5,new6,new7; - __m128i *beta128,*alpha128,*beta_ptr; __m128i beta_max; #elif defined(__arm__) int16x8_t m11_128,m10_128; int16x8_t m_b0,m_b1,m_b2,m_b3,m_b4,m_b5,m_b6,m_b7; int16x8_t new0,new1,new2,new3,new4,new5,new6,new7; - int16x8_t *beta128,*alpha128,*beta_ptr; int16x8_t beta_max; #endif - int16_t m11,m10,beta0_16,beta1_16,beta2_16,beta3_16,beta4_16,beta5_16,beta6_16,beta7_16,beta0_2,beta1_2,beta2_2,beta3_2,beta_m; llr_t beta0,beta1; - #ifdef DEBUG_LOGMAP fprintf(fdsse4,"compute_beta, %p,%p,%p,%p,framelength %d,F %d\n", - beta,m_11,m_10,alpha,frame_length,F); + beta,m_11,m_10,alpha,frame_length,F); #endif - - // termination for beta initialization - // fprintf(fdsse4,"beta init: offset8 %d\n",offset8_flag); m11=(int16_t)m_11[2+frame_length]; //#ifndef __AVX2__ @@ -570,16 +536,13 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh #ifdef DEBUG_LOGMAP fprintf(fdsse4,"m11,m10 %d,%d\n",m11,m10); #endif - beta0 = -m11;//M0T_TERM; beta1 = m11;//M1T_TERM; m11=(int16_t)m_11[1+frame_length]; m10=(int16_t)m_10[1+frame_length]; - #ifdef DEBUG_LOGMAP fprintf(fdsse4,"m11,m10 %d,%d\n",m11,m10); #endif - beta0_2 = beta0-m11;//+M0T_TERM; beta1_2 = beta0+m11;//+M1T_TERM; beta2_2 = beta1+m10;//M2T_TERM; @@ -597,8 +560,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh beta5_16 = beta2_2+m10;//+M5T_TERM; beta6_16 = beta3_2+m11;//+M6T_TERM; beta7_16 = beta3_2-m11;//+M7T_TERM; - - beta_m = (beta0_16>beta1_16) ? beta0_16 : beta1_16; beta_m = (beta_m>beta2_16) ? beta_m : beta2_16; beta_m = (beta_m>beta3_16) ? beta_m : beta3_16; @@ -606,8 +567,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh beta_m = (beta_m>beta5_16) ? beta_m : beta5_16; beta_m = (beta_m>beta6_16) ? beta_m : beta6_16; beta_m = (beta_m>beta7_16) ? beta_m : beta7_16; - - beta0_16=beta0_16-beta_m; beta1_16=beta1_16-beta_m; beta2_16=beta2_16-beta_m; @@ -619,12 +578,13 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh for (rerun_flag=0;; rerun_flag=1) { #if defined(__x86_64__) || defined(__i386__) - beta_ptr = (__m128i*)&beta[frame_length<<3]; - alpha128 = (__m128i*)&alpha[0]; + beta_ptr = (__m128i *)&beta[frame_length<<3]; + alpha128 = (__m128i *)&alpha[0]; #elif defined(__arm__) - beta_ptr = (int16x8_t*)&beta[frame_length<<3]; - alpha128 = (int16x8_t*)&alpha[0]; + beta_ptr = (int16x8_t *)&beta[frame_length<<3]; + alpha128 = (int16x8_t *)&alpha[0]; #endif + if (rerun_flag == 0) { beta_ptr[0] = alpha128[(frame_length)]; beta_ptr[1] = alpha128[1+(frame_length)]; @@ -636,18 +596,18 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh beta_ptr[7] = alpha128[7+(frame_length)]; #ifdef DEBUG_LOGMAP fprintf(fdsse4,"beta init \n"); - print_shorts("b0",(int16_t*)&beta_ptr[0]); - print_shorts("b1",(int16_t*)&beta_ptr[1]); - print_shorts("b2",(int16_t*)&beta_ptr[2]); - print_shorts("b3",(int16_t*)&beta_ptr[3]); - print_shorts("b4",(int16_t*)&beta_ptr[4]); - print_shorts("b5",(int16_t*)&beta_ptr[5]); - print_shorts("b6",(int16_t*)&beta_ptr[6]); - print_shorts("b7",(int16_t*)&beta_ptr[7]); + print_shorts("b0",(int16_t *)&beta_ptr[0]); + print_shorts("b1",(int16_t *)&beta_ptr[1]); + print_shorts("b2",(int16_t *)&beta_ptr[2]); + print_shorts("b3",(int16_t *)&beta_ptr[3]); + print_shorts("b4",(int16_t *)&beta_ptr[4]); + print_shorts("b5",(int16_t *)&beta_ptr[5]); + print_shorts("b6",(int16_t *)&beta_ptr[6]); + print_shorts("b7",(int16_t *)&beta_ptr[7]); #endif } else { #if defined(__x86_64__) || defined(__i386__) - beta128 = (__m128i*)&beta[0]; + beta128 = (__m128i *)&beta[0]; beta_ptr[0] = _mm_srli_si128(beta128[0],2); beta_ptr[1] = _mm_srli_si128(beta128[1],2); beta_ptr[2] = _mm_srli_si128(beta128[2],2); @@ -657,31 +617,38 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh beta_ptr[6] = _mm_srli_si128(beta128[6],2); beta_ptr[7] = _mm_srli_si128(beta128[7],2); #elif defined(__arm__) - beta128 = (int16x8_t*)&beta[0]; - beta_ptr = (int16x8_t*)&beta[frame_length<<3]; - beta_ptr[0] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[0],16); beta_ptr[0] = vsetq_lane_s16(beta[3],beta_ptr[0],4); - beta_ptr[1] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[1],16); beta_ptr[1] = vsetq_lane_s16(beta[11],beta_ptr[1],4); - beta_ptr[2] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[2],16); beta_ptr[2] = vsetq_lane_s16(beta[19],beta_ptr[2],4); - beta_ptr[3] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[3],16); beta_ptr[3] = vsetq_lane_s16(beta[27],beta_ptr[3],4); - beta_ptr[4] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[4],16); beta_ptr[4] = vsetq_lane_s16(beta[35],beta_ptr[4],4); - beta_ptr[5] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[5],16); beta_ptr[5] = vsetq_lane_s16(beta[43],beta_ptr[5],4); - beta_ptr[6] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[6],16); beta_ptr[6] = vsetq_lane_s16(beta[51],beta_ptr[6],4); - beta_ptr[7] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[7],16); beta_ptr[7] = vsetq_lane_s16(beta[59],beta_ptr[7],4); + beta128 = (int16x8_t *)&beta[0]; + beta_ptr = (int16x8_t *)&beta[frame_length<<3]; + beta_ptr[0] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[0],16); + beta_ptr[0] = vsetq_lane_s16(beta[3],beta_ptr[0],4); + beta_ptr[1] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[1],16); + beta_ptr[1] = vsetq_lane_s16(beta[11],beta_ptr[1],4); + beta_ptr[2] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[2],16); + beta_ptr[2] = vsetq_lane_s16(beta[19],beta_ptr[2],4); + beta_ptr[3] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[3],16); + beta_ptr[3] = vsetq_lane_s16(beta[27],beta_ptr[3],4); + beta_ptr[4] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[4],16); + beta_ptr[4] = vsetq_lane_s16(beta[35],beta_ptr[4],4); + beta_ptr[5] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[5],16); + beta_ptr[5] = vsetq_lane_s16(beta[43],beta_ptr[5],4); + beta_ptr[6] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[6],16); + beta_ptr[6] = vsetq_lane_s16(beta[51],beta_ptr[6],4); + beta_ptr[7] = (int16x8_t)vshrq_n_s64((int64x2_t)beta128[7],16); + beta_ptr[7] = vsetq_lane_s16(beta[59],beta_ptr[7],4); #endif #ifdef DEBUG_LOGMAP fprintf(fdsse4,"beta init (second run) \n"); - print_shorts("b0",(int16_t*)&beta_ptr[0]); - print_shorts("b1",(int16_t*)&beta_ptr[1]); - print_shorts("b2",(int16_t*)&beta_ptr[2]); - print_shorts("b3",(int16_t*)&beta_ptr[3]); - print_shorts("b4",(int16_t*)&beta_ptr[4]); - print_shorts("b5",(int16_t*)&beta_ptr[5]); - print_shorts("b6",(int16_t*)&beta_ptr[6]); - print_shorts("b7",(int16_t*)&beta_ptr[7]); + print_shorts("b0",(int16_t *)&beta_ptr[0]); + print_shorts("b1",(int16_t *)&beta_ptr[1]); + print_shorts("b2",(int16_t *)&beta_ptr[2]); + print_shorts("b3",(int16_t *)&beta_ptr[3]); + print_shorts("b4",(int16_t *)&beta_ptr[4]); + print_shorts("b5",(int16_t *)&beta_ptr[5]); + print_shorts("b6",(int16_t *)&beta_ptr[6]); + print_shorts("b7",(int16_t *)&beta_ptr[7]); #endif } - #if defined(__x86_64__) || defined(__i386__) beta_ptr[0] = _mm_insert_epi16(beta_ptr[0],beta0_16,7); beta_ptr[1] = _mm_insert_epi16(beta_ptr[1],beta1_16,7); @@ -701,26 +668,23 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh beta_ptr[6] = vsetq_lane_s16(beta6_16,beta_ptr[6],7); beta_ptr[7] = vsetq_lane_s16(beta7_16,beta_ptr[7],7); #endif - #ifdef DEBUG_LOGMAP - fprintf(fdsse4,"beta init (after insert) \n"); - print_shorts("b0",(int16_t*)&beta_ptr[0]); - print_shorts("b1",(int16_t*)&beta_ptr[1]); - print_shorts("b2",(int16_t*)&beta_ptr[2]); - print_shorts("b3",(int16_t*)&beta_ptr[3]); - print_shorts("b4",(int16_t*)&beta_ptr[4]); - print_shorts("b5",(int16_t*)&beta_ptr[5]); - print_shorts("b6",(int16_t*)&beta_ptr[6]); - print_shorts("b7",(int16_t*)&beta_ptr[7]); + fprintf(fdsse4,"beta init (after insert) \n"); + print_shorts("b0",(int16_t *)&beta_ptr[0]); + print_shorts("b1",(int16_t *)&beta_ptr[1]); + print_shorts("b2",(int16_t *)&beta_ptr[2]); + print_shorts("b3",(int16_t *)&beta_ptr[3]); + print_shorts("b4",(int16_t *)&beta_ptr[4]); + print_shorts("b5",(int16_t *)&beta_ptr[5]); + print_shorts("b6",(int16_t *)&beta_ptr[6]); + print_shorts("b7",(int16_t *)&beta_ptr[7]); #endif int loopval=((rerun_flag==0)?0:((frame_length-L)>>3)); for (k=(frame_length>>3)-1; k>=loopval; k--) { #if defined(__x86_64__) || defined(__i386__) - m11_128=((__m128i*)m_11)[k]; - m10_128=((__m128i*)m_10)[k]; - - + m11_128=((__m128i *)m_11)[k]; + m10_128=((__m128i *)m_10)[k]; //#ifndef __AVX2__ #if 1 m_b0 = _mm_adds_epi16(beta_ptr[4],m11_128); //m11 @@ -731,8 +695,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh m_b5 = _mm_subs_epi16(beta_ptr[6],m10_128); //m01 m_b6 = _mm_subs_epi16(beta_ptr[7],m11_128); //m00 m_b7 = _mm_adds_epi16(beta_ptr[7],m11_128); //m11 - - new0 = _mm_subs_epi16(beta_ptr[0],m11_128); //m00 new1 = _mm_adds_epi16(beta_ptr[0],m11_128); //m11 new2 = _mm_adds_epi16(beta_ptr[1],m10_128); //m10 @@ -741,16 +703,13 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh new5 = _mm_adds_epi16(beta_ptr[2],m10_128); //m10 new6 = _mm_adds_epi16(beta_ptr[3],m11_128); //m11 new7 = _mm_subs_epi16(beta_ptr[3],m11_128); //m00 - #else - b01=_mm256_load_si256(&((_m256i*)beta_ptr)[0]); - b23=_mm256_load_si256(&((_m256i*)beta_ptr)[1]); - b45=_mm256_load_si256(&((_m256i*)beta_ptr)[2]); - b67=_mm256_load_si256(&((_m256i*)beta_ptr)[3]); + b01=_mm256_load_si256(&((_m256i *)beta_ptr)[0]); + b23=_mm256_load_si256(&((_m256i *)beta_ptr)[1]); + b45=_mm256_load_si256(&((_m256i *)beta_ptr)[2]); + b67=_mm256_load_si256(&((_m256i *)beta_ptr)[3]); m11m10_256 = _mm256_insertf128_si256(m11m10_256,m11_128,0); m11m10_256 = _mm256_insertf128_si256(m11m10_256,m10_128,1); - - m_b02 = _mm256_adds_epi16(b45,m11m10_256); //negative m10 m_b13 = _mm256_subs_epi16(b45,m11m10_256); //negative m10 m_b64 = _mm256_subs_epi16(b67,m11m10_256); //negative m10 @@ -760,9 +719,7 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh new64 = _mm256_adds_epi16(b23,m11m10_256); //negative m10 new75 = _mm256_subs_epi16(b24,m11m10_256); //negative m10 #endif - beta_ptr-=8; - //#ifndef __AVX2__ #if 1 beta_ptr[0] = _mm_max_epi16(m_b0,new0); @@ -773,7 +730,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh beta_ptr[5] = _mm_max_epi16(m_b5,new5); beta_ptr[6] = _mm_max_epi16(m_b6,new6); beta_ptr[7] = _mm_max_epi16(m_b7,new7); - beta_max = _mm_max_epi16(beta_ptr[0],beta_ptr[1]); beta_max = _mm_max_epi16(beta_max ,beta_ptr[2]); beta_max = _mm_max_epi16(beta_max ,beta_ptr[3]); @@ -781,7 +737,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh beta_max = _mm_max_epi16(beta_max ,beta_ptr[5]); beta_max = _mm_max_epi16(beta_max ,beta_ptr[6]); beta_max = _mm_max_epi16(beta_max ,beta_ptr[7]); - beta_ptr[0] = _mm_subs_epi16(beta_ptr[0],beta_max); beta_ptr[1] = _mm_subs_epi16(beta_ptr[1],beta_max); beta_ptr[2] = _mm_subs_epi16(beta_ptr[2],beta_max); @@ -795,26 +750,22 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh b13 = _mm256_max_epi16(m_b13,new13); b64 = _mm256_max_epi16(m_b64,new64); b75 = _mm256_max_epi16(m_b75,new75); - beta_max = _mm256_max_epi16(b02,b13); beta_max = _mm256_max_epi16(beta_max,b64); beta_max = _mm256_max_epi16(beta_max,b75); beta_max = _mm256_max_epi16(beta_max,_mm256_permutevar8x32_epi32(betaa_max,_mm256_set_epi32(3,2,1,0,7,6,5,4))); - b02 = _mm256_subs_epi16(b02,beta_max); b13 = _mm256_subs_epi16(b13,beta_max); b64 = _mm256_subs_epi16(b64,beta_max); b75 = _mm256_subs_epi16(b75,beta_max); - - ((_m256i*)beta_ptr)[0]) = _mm256_permute2x128_si256(b02,b13,0x02); //b01 - ((_m256i*)beta_ptr)[1]) = _mm256_permute2x128_si256(b02,b13,0x31); //b23 - ((_m256i*)beta_ptr)[2]) = _mm256_permute2x128_si256(b64,b75,0x13); //b45 - ((_m256i*)beta_ptr)[3]) = _mm256_permute2x128_si256(b64,b75,0x20); //b67 + ((_m256i *)beta_ptr)[0]) = _mm256_permute2x128_si256(b02,b13,0x02); //b01 + ((_m256i *)beta_ptr)[1]) = _mm256_permute2x128_si256(b02,b13,0x31); //b23 + ((_m256i *)beta_ptr)[2]) = _mm256_permute2x128_si256(b64,b75,0x13); //b45 + ((_m256i *)beta_ptr)[3]) = _mm256_permute2x128_si256(b64,b75,0x20); //b67 #endif - #elif defined(__arm__) - m11_128=((int16x8_t*)m_11)[k]; - m10_128=((int16x8_t*)m_10)[k]; + m11_128=((int16x8_t *)m_11)[k]; + m10_128=((int16x8_t *)m_10)[k]; m_b0 = vqaddq_s16(beta_ptr[4],m11_128); //m11 m_b1 = vqsubq_s16(beta_ptr[4],m11_128); //m00 m_b2 = vqsubq_s16(beta_ptr[5],m10_128); //m01 @@ -823,7 +774,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh m_b5 = vqsubq_s16(beta_ptr[6],m10_128); //m01 m_b6 = vqsubq_s16(beta_ptr[7],m11_128); //m00 m_b7 = vqaddq_s16(beta_ptr[7],m11_128); //m11 - new0 = vqsubq_s16(beta_ptr[0],m11_128); //m00 new1 = vqaddq_s16(beta_ptr[0],m11_128); //m11 new2 = vqaddq_s16(beta_ptr[1],m10_128); //m10 @@ -832,9 +782,7 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh new5 = vqaddq_s16(beta_ptr[2],m10_128); //m10 new6 = vqaddq_s16(beta_ptr[3],m11_128); //m11 new7 = vqsubq_s16(beta_ptr[3],m11_128); //m00 - beta_ptr-=8; - beta_ptr[0] = vmaxq_s16(m_b0,new0); beta_ptr[1] = vmaxq_s16(m_b1,new1); beta_ptr[2] = vmaxq_s16(m_b2,new2); @@ -843,7 +791,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh beta_ptr[5] = vmaxq_s16(m_b5,new5); beta_ptr[6] = vmaxq_s16(m_b6,new6); beta_ptr[7] = vmaxq_s16(m_b7,new7); - beta_max = vmaxq_s16(beta_ptr[0],beta_ptr[1]); beta_max = vmaxq_s16(beta_max ,beta_ptr[2]); beta_max = vmaxq_s16(beta_max ,beta_ptr[3]); @@ -851,7 +798,6 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh beta_max = vmaxq_s16(beta_max ,beta_ptr[5]); beta_max = vmaxq_s16(beta_max ,beta_ptr[6]); beta_max = vmaxq_s16(beta_max ,beta_ptr[7]); - beta_ptr[0] = vqsubq_s16(beta_ptr[0],beta_max); beta_ptr[1] = vqsubq_s16(beta_ptr[1],beta_max); beta_ptr[2] = vqsubq_s16(beta_ptr[2],beta_max); @@ -861,20 +807,18 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh beta_ptr[6] = vqsubq_s16(beta_ptr[6],beta_max); beta_ptr[7] = vqsubq_s16(beta_ptr[7],beta_max); #endif - #ifdef DEBUG_LOGMAP fprintf(fdsse4,"Loop index %d, mb\n",k); fprintf(fdsse4,"beta init (after max)\n"); - print_shorts("b0",(int16_t*)&beta_ptr[0]); - print_shorts("b1",(int16_t*)&beta_ptr[1]); - print_shorts("b2",(int16_t*)&beta_ptr[2]); - print_shorts("b3",(int16_t*)&beta_ptr[3]); - print_shorts("b4",(int16_t*)&beta_ptr[4]); - print_shorts("b5",(int16_t*)&beta_ptr[5]); - print_shorts("b6",(int16_t*)&beta_ptr[6]); - print_shorts("b7",(int16_t*)&beta_ptr[7]); + print_shorts("b0",(int16_t *)&beta_ptr[0]); + print_shorts("b1",(int16_t *)&beta_ptr[1]); + print_shorts("b2",(int16_t *)&beta_ptr[2]); + print_shorts("b3",(int16_t *)&beta_ptr[3]); + print_shorts("b4",(int16_t *)&beta_ptr[4]); + print_shorts("b5",(int16_t *)&beta_ptr[5]); + print_shorts("b6",(int16_t *)&beta_ptr[6]); + print_shorts("b7",(int16_t *)&beta_ptr[7]); #endif - } if (rerun_flag==1) @@ -882,8 +826,7 @@ void compute_beta16(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sh } } -void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, llr_t* systematic,unsigned short frame_length) -{ +void compute_ext16(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,llr_t *ext, llr_t *systematic,unsigned short frame_length) { #if defined(__x86_64__) || defined(__i386__) __m128i *alpha128=(__m128i *)alpha; __m128i *beta128=(__m128i *)beta; @@ -903,28 +846,21 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, int16x8_t m10_1,m10_2,m10_3,m10_4; int16x8_t m11_1,m11_2,m11_3,m11_4; #endif - int k; - // // LLR computation, 8 consequtive bits per loop // - #ifdef DEBUG_LOGMAP fprintf(fdsse4,"compute_ext (sse_16bit), %p, %p, %p, %p, %p, %p ,framelength %d\n",alpha,beta,m_11,m_10,ext,systematic,frame_length); #endif - alpha_ptr = alpha128; beta_ptr = &beta128[8]; - for (k=0; k<(frame_length>>3); k++) { - #if defined(__x86_64__) || defined(__i386__) - m11_128 = (__m128i*)&m_11[k<<3]; - m10_128 = (__m128i*)&m_10[k<<3]; - ext_128 = (__m128i*)&ext[k<<3]; - + m11_128 = (__m128i *)&m_11[k<<3]; + m10_128 = (__m128i *)&m_10[k<<3]; + ext_128 = (__m128i *)&ext[k<<3]; /* fprintf(fdsse4,"EXT %03d\n",k); print_shorts("a0:",&alpha_ptr[0]); @@ -944,7 +880,6 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, print_shorts("b6:",&beta_ptr[6]); print_shorts("b7:",&beta_ptr[7]); */ - //#ifndef __AVX2__ #if 1 m00_4 = _mm_adds_epi16(alpha_ptr[7],beta_ptr[3]); //ALPHA_BETA_4m00; @@ -964,31 +899,23 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, m10_1 = _mm_adds_epi16(alpha_ptr[2],beta_ptr[1]); //ALPHA_BETA_1m10; m01_1 = _mm_adds_epi16(alpha_ptr[2],beta_ptr[5]); //ALPHA_BETA_1m01; #else - - m00_1 = _mm_adds_epi16(alpha_ptr[0],beta_ptr[0]); //ALPHA_BETA_1m00; m10_1 = _mm_adds_epi16(alpha_ptr[2],beta_ptr[1]); //ALPHA_BETA_1m10; m11_1 = _mm_adds_epi16(alpha_ptr[0],beta_ptr[4]); //ALPHA_BETA_1m11; m01_1 = _mm_adds_epi16(alpha_ptr[2],beta_ptr[5]); //ALPHA_BETA_1m01; - m11_2 = _mm_adds_epi16(alpha_ptr[1],beta_ptr[0]); //ALPHA_BETA_2m11; m01_2 = _mm_adds_epi16(alpha_ptr[3],beta_ptr[1]); //ALPHA_BETA_2m01; m00_2 = _mm_adds_epi16(alpha_ptr[1],beta_ptr[4]); //ALPHA_BETA_2m00; m10_2 = _mm_adds_epi16(alpha_ptr[3],beta_ptr[5]); //ALPHA_BETA_2m10; - m11_3 = _mm_adds_epi16(alpha_ptr[6],beta_ptr[3]); //ALPHA_BETA_3m11; m01_3 = _mm_adds_epi16(alpha_ptr[4],beta_ptr[2]); //ALPHA_BETA_3m01; m00_3 = _mm_adds_epi16(alpha_ptr[6],beta_ptr[7]); //ALPHA_BETA_3m00; m10_3 = _mm_adds_epi16(alpha_ptr[4],beta_ptr[6]); //ALPHA_BETA_3m10; - m00_4 = _mm_adds_epi16(alpha_ptr[7],beta_ptr[3]); //ALPHA_BETA_4m00; m10_4 = _mm_adds_epi16(alpha_ptr[5],beta_ptr[2]); //ALPHA_BETA_4m10; m11_4 = _mm_adds_epi16(alpha_ptr[7],beta_ptr[7]); //ALPHA_BETA_4m11; m01_4 = _mm_adds_epi16(alpha_ptr[5],beta_ptr[6]); //ALPHA_BETA_4m01; - - #endif - /* print_shorts("m11_1:",&m11_1); print_shorts("m11_2:",&m11_2); @@ -1019,36 +946,30 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, m11_1 = _mm_max_epi16(m11_1,m11_2); m11_1 = _mm_max_epi16(m11_1,m11_3); m11_1 = _mm_max_epi16(m11_1,m11_4); - // print_shorts("m11_1:",&m11_1); - m01_1 = _mm_subs_epi16(m01_1,*m10_128); m00_1 = _mm_subs_epi16(m00_1,*m11_128); m10_1 = _mm_adds_epi16(m10_1,*m10_128); m11_1 = _mm_adds_epi16(m11_1,*m11_128); - // print_shorts("m10_1:",&m10_1); // print_shorts("m11_1:",&m11_1); m01_1 = _mm_max_epi16(m01_1,m00_1); m10_1 = _mm_max_epi16(m10_1,m11_1); // print_shorts("m01_1:",&m01_1); // print_shorts("m10_1:",&m10_1); - *ext_128 = _mm_subs_epi16(m10_1,m01_1); #ifdef DEBUG_LOGMAP fprintf(fdsse4,"ext %p\n",ext_128); - print_shorts("ext:",(int16_t*)ext_128); - print_shorts("m11:",(int16_t*)m11_128); - print_shorts("m10:",(int16_t*)m10_128); - print_shorts("m10_1:",(int16_t*)&m10_1); - print_shorts("m01_1:",(int16_t*)&m01_1); + print_shorts("ext:",(int16_t *)ext_128); + print_shorts("m11:",(int16_t *)m11_128); + print_shorts("m10:",(int16_t *)m10_128); + print_shorts("m10_1:",(int16_t *)&m10_1); + print_shorts("m01_1:",(int16_t *)&m01_1); #endif - #elif defined(__arm__) - m11_128 = (int16x8_t*)&m_11[k<<3]; - m10_128 = (int16x8_t*)&m_10[k<<3]; - ext_128 = (int16x8_t*)&ext[k<<3]; - + m11_128 = (int16x8_t *)&m_11[k<<3]; + m10_128 = (int16x8_t *)&m_10[k<<3]; + ext_128 = (int16x8_t *)&ext[k<<3]; m00_4 = vqaddq_s16(alpha_ptr[7],beta_ptr[3]); //ALPHA_BETA_4m00; m11_4 = vqaddq_s16(alpha_ptr[7],beta_ptr[7]); //ALPHA_BETA_4m11; m00_3 = vqaddq_s16(alpha_ptr[6],beta_ptr[7]); //ALPHA_BETA_3m00; @@ -1065,7 +986,6 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, m10_2 = vqaddq_s16(alpha_ptr[3],beta_ptr[5]); //ALPHA_BETA_2m10; m10_1 = vqaddq_s16(alpha_ptr[2],beta_ptr[1]); //ALPHA_BETA_1m10; m01_1 = vqaddq_s16(alpha_ptr[2],beta_ptr[5]); //ALPHA_BETA_1m01; - m01_1 = vmaxq_s16(m01_1,m01_2); m01_1 = vmaxq_s16(m01_1,m01_3); m01_1 = vmaxq_s16(m01_1,m01_4); @@ -1078,18 +998,12 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, m11_1 = vmaxq_s16(m11_1,m11_2); m11_1 = vmaxq_s16(m11_1,m11_3); m11_1 = vmaxq_s16(m11_1,m11_4); - - m01_1 = vqsubq_s16(m01_1,*m10_128); m00_1 = vqsubq_s16(m00_1,*m11_128); m10_1 = vqaddq_s16(m10_1,*m10_128); m11_1 = vqaddq_s16(m11_1,*m11_128); - - m01_1 = vmaxq_s16(m01_1,m00_1); m10_1 = vmaxq_s16(m10_1,m11_1); - - *ext_128 = vqsubq_s16(m10_1,m01_1); #endif alpha_ptr+=8; @@ -1102,8 +1016,7 @@ void compute_ext16(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, //int pi2[n],pi3[n+8],pi5[n+8],pi4[n+8],pi6[n+8], int *pi2tab16[188],*pi5tab16[188],*pi4tab16[188],*pi6tab16[188]; -void free_td16(void) -{ +void free_td16(void) { int ind; for (ind=0; ind<188; ind++) { @@ -1114,14 +1027,11 @@ void free_td16(void) } } -void init_td16(void) -{ - +void init_td16(void) { int ind,i,i2,i3,j,n,pi,pi3; - short * base_interleaver; + short *base_interleaver; for (ind=0; ind<188; ind++) { - n = f1f2mat[ind].nb_bits; base_interleaver=il_tb+f1f2mat[ind].beg_index; #ifdef MEX @@ -1141,10 +1051,8 @@ void init_td16(void) j=i2; for (i3=0; i3<(n>>3); i3++,i++,j+=8) { - // if (j>=n) // j-=(n-1); - pi2tab16[ind][i] = j; // fprintf(fdsse4,"pi2[%d] = %d\n",i,j); } @@ -1157,71 +1065,59 @@ void init_td16(void) pi5tab16[ind][pi3] = pi2tab16[ind][i]; pi6tab16[ind][pi] = pi2tab16[ind][i]; } - } } uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, - int16_t *y2, - uint8_t *decoded_bytes, - uint8_t *decoded_bytes2, - uint16_t n, - uint8_t max_iterations, - uint8_t crc_type, - uint8_t F, - time_stats_t *init_stats, - time_stats_t *alpha_stats, - time_stats_t *beta_stats, - time_stats_t *gamma_stats, - time_stats_t *ext_stats, - time_stats_t *intl1_stats, - time_stats_t *intl2_stats) { - + int16_t *y2, + uint8_t *decoded_bytes, + uint8_t *decoded_bytes2, + uint16_t n, + uint8_t max_iterations, + uint8_t crc_type, + uint8_t F, + time_stats_t *init_stats, + time_stats_t *alpha_stats, + time_stats_t *beta_stats, + time_stats_t *gamma_stats, + time_stats_t *ext_stats, + time_stats_t *intl1_stats, + time_stats_t *intl2_stats) { /* y is a pointer to the input decoded_bytes is a pointer to the decoded output n is the size in bits of the coded block, with the tail */ - - llr_t systematic0[n+16] __attribute__ ((aligned(32))); llr_t systematic1[n+16] __attribute__ ((aligned(32))); llr_t systematic2[n+16] __attribute__ ((aligned(32))); llr_t yparity1[n+16] __attribute__ ((aligned(32))); llr_t yparity2[n+16] __attribute__ ((aligned(32))); - llr_t ext[n+128] __attribute__((aligned(32))); llr_t ext2[n+128] __attribute__((aligned(32))); - llr_t alpha[(n+16)*8] __attribute__ ((aligned(32))); llr_t beta[(n+16)*8] __attribute__ ((aligned(32))); llr_t m11[n+32] __attribute__ ((aligned(32))); llr_t m10[n+32] __attribute__ ((aligned(32))); - - int *pi2_p,*pi4_p,*pi5_p,*pi6_p; llr_t *s,*s1,*s2,*yp1,*yp2,*yp; unsigned int i,j,iind;//,pi; unsigned char iteration_cnt=0; unsigned int crc,oldcrc,crc_len; uint8_t temp; - #if defined(__x86_64__) || defined(__i386__) __m128i *yp128; __m128i tmp, zeros=_mm_setzero_si128(); __m128i tmpe; #elif defined(__arm__) int16x8_t *yp128; -// int16x8_t tmp128[(n+8)>>3]; + // int16x8_t tmp128[(n+8)>>3]; int16x8_t tmp, zeros=vdupq_n_s16(0); - const uint16_t __attribute__ ((aligned (16))) _Powers[8]= - { 1, 2, 4, 8, 16, 32, 64, 128}; + const uint16_t __attribute__ ((aligned (16))) _Powers[8]= + { 1, 2, 4, 8, 16, 32, 64, 128}; uint16x8_t Powers= vld1q_u16(_Powers); #endif int offset8_flag=0; - #ifdef DEBUG_LOGMAP fdsse4 = fopen("dump_sse4.txt","w"); - - printf("tc sse4_16 (y) %p\n",y); #endif @@ -1230,12 +1126,8 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, return 255; } - - start_meas(init_stats); - - for (iind=0; iind < 188 && f1f2mat[iind].nb_bits != n; iind++); if ( iind == 188 ) { @@ -1244,50 +1136,41 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, } switch (crc_type) { - case CRC24_A: - case CRC24_B: - crc_len=3; - break; + case CRC24_A: + case CRC24_B: + crc_len=3; + break; - case CRC16: - crc_len=2; - break; + case CRC16: + crc_len=2; + break; - case CRC8: - crc_len=1; - break; + case CRC8: + crc_len=1; + break; - default: - crc_len=3; + default: + crc_len=3; } #if defined(__x86_64__) || defined(__i386__) - yp128 = (__m128i*)y; + yp128 = (__m128i *)y; #elif defined(__arm__) - yp128 = (int16x8_t*)y; + yp128 = (int16x8_t *)y; #endif - - - - s = systematic0; s1 = systematic1; s2 = systematic2; yp1 = yparity1; yp2 = yparity2; - - for (i=0; i<n; i+=8) { pi2_p = &pi2tab16[iind][i]; - j=pi2_p[0]; - #if defined(__x86_64__) || defined(__i386__) tmpe = _mm_load_si128(yp128); // fprintf(fdsse4,"yp128 %p\n",yp128); // print_shorts("tmpe",(int16_t *)&tmpe); - s[j] = _mm_extract_epi16(tmpe,0); yp1[j] = _mm_extract_epi16(tmpe,1); yp2[j] = _mm_extract_epi16(tmpe,2); @@ -1295,7 +1178,6 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, fprintf(fdsse4,"init0: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); #endif j=pi2_p[1]; - s[j] = _mm_extract_epi16(tmpe,3); yp1[j] = _mm_extract_epi16(tmpe,4); yp2[j] = _mm_extract_epi16(tmpe,5); @@ -1303,7 +1185,6 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, fprintf(fdsse4,"init1: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); #endif j=pi2_p[2]; - s[j] = _mm_extract_epi16(tmpe,6); yp1[j] = _mm_extract_epi16(tmpe,7); tmpe = _mm_load_si128(&yp128[1]); @@ -1312,7 +1193,6 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, fprintf(fdsse4,"init2: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); #endif j=pi2_p[3]; - s[j] = _mm_extract_epi16(tmpe,1); yp1[j] = _mm_extract_epi16(tmpe,2); yp2[j] = _mm_extract_epi16(tmpe,3); @@ -1320,7 +1200,6 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, fprintf(fdsse4,"init3: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); #endif j=pi2_p[4]; - s[j] = _mm_extract_epi16(tmpe,4); yp1[j] = _mm_extract_epi16(tmpe,5); yp2[j] = _mm_extract_epi16(tmpe,6); @@ -1328,7 +1207,6 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, fprintf(fdsse4,"init4: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); #endif j=pi2_p[5]; - s[j] = _mm_extract_epi16(tmpe,7); tmpe = _mm_load_si128(&yp128[2]); yp1[j] = _mm_extract_epi16(tmpe,0); @@ -1336,9 +1214,7 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, #ifdef DEBUG_LOGMAP fprintf(fdsse4,"init5: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); #endif - j=pi2_p[6]; - s[j] = _mm_extract_epi16(tmpe,2); yp1[j] = _mm_extract_epi16(tmpe,3); yp2[j] = _mm_extract_epi16(tmpe,4); @@ -1346,60 +1222,49 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, fprintf(fdsse4,"init6: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); #endif j=pi2_p[7]; - s[j] = _mm_extract_epi16(tmpe,5); yp1[j] = _mm_extract_epi16(tmpe,6); yp2[j] = _mm_extract_epi16(tmpe,7); #ifdef DEBUG_LOGMAP fprintf(fdsse4,"init7: j %d, s[j] %d yp1[j] %d yp2[j] %d\n",j,s[j],yp1[j],yp2[j]); #endif - #elif defined(__arm__) s[j] = vgetq_lane_s16(yp128[0],0); yp1[j] = vgetq_lane_s16(yp128[0],1); yp2[j] = vgetq_lane_s16(yp128[0],2); - j=pi2_p[1]; s[j] = vgetq_lane_s16(yp128[0],3); yp1[j] = vgetq_lane_s16(yp128[0],4); yp2[j] = vgetq_lane_s16(yp128[0],5); - j=pi2_p[2]; s[j] = vgetq_lane_s16(yp128[0],6); yp1[j] = vgetq_lane_s16(yp128[0],7); yp2[j] = vgetq_lane_s16(yp128[1],0); - j=pi2_p[3]; s[j] = vgetq_lane_s16(yp128[1],1); yp1[j] = vgetq_lane_s16(yp128[1],2); yp2[j] = vgetq_lane_s16(yp128[1],3); - j=pi2_p[4]; s[j] = vgetq_lane_s16(yp128[1],4); yp1[j] = vgetq_lane_s16(yp128[1],5); yp2[j] = vgetq_lane_s16(yp128[1],6); - j=pi2_p[5]; s[j] = vgetq_lane_s16(yp128[1],7); yp1[j] = vgetq_lane_s16(yp128[2],0); yp2[j] = vgetq_lane_s16(yp128[2],1); - j=pi2_p[6]; s[j] = vgetq_lane_s16(yp128[2],2); yp1[j] = vgetq_lane_s16(yp128[2],3); yp2[j] = vgetq_lane_s16(yp128[2],4); - j=pi2_p[7]; s[j] = vgetq_lane_s16(yp128[2],5); yp1[j] = vgetq_lane_s16(yp128[2],6); yp2[j] = vgetq_lane_s16(yp128[2],7); #endif yp128+=3; - } - yp=(llr_t*)yp128; - + yp=(llr_t *)yp128; // Termination for (i=n; i<n+3; i++) { @@ -1410,7 +1275,7 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, yp1[i] = *yp; yp++; #ifdef DEBUG_LOGMAP - fprintf(fdsse4,"Term 1 (%d): %d %d\n",i,s[i],yp1[i]); + fprintf(fdsse4,"Term 1 (%u): %d %d\n",i,s[i],yp1[i]); #endif //DEBUG_LOGMAP } @@ -1422,32 +1287,25 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, yp2[i-8] = *yp; yp++; #ifdef DEBUG_LOGMAP - fprintf(fdsse4,"Term 2 (%d): %d %d\n",i-3,s[i],yp2[i-8]); + fprintf(fdsse4,"Term 2 (%u): %d %d\n",i-3,s[i],yp2[i-8]); #endif //DEBUG_LOGMAP } #ifdef DEBUG_LOGMAP fprintf(fdsse4,"\n"); #endif //DEBUG_LOGMAP - stop_meas(init_stats); - // do log_map from first parity bit - log_map16(systematic0,yparity1,m11,m10,alpha,beta,ext,n,0,F,offset8_flag,alpha_stats,beta_stats,gamma_stats,ext_stats); while (iteration_cnt++ < max_iterations) { - #ifdef DEBUG_LOGMAP fprintf(fdsse4,"\n*******************ITERATION %d (n %d), ext %p\n\n",iteration_cnt,n,ext); #endif //DEBUG_LOGMAP - start_meas(intl1_stats); - pi4_p=pi4tab16[iind]; for (i=0; i<(n>>3); i++) { // steady-state portion - #if defined(__x86_64__) || defined(__i386__) ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],ext[*pi4_p++],0); ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],ext[*pi4_p++],1); @@ -1457,30 +1315,24 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],ext[*pi4_p++],5); ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],ext[*pi4_p++],6); ((__m128i *)systematic2)[i]=_mm_insert_epi16(((__m128i *)systematic2)[i],ext[*pi4_p++],7); - #elif defined(__arm__) - ((int16x8_t*)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t*)systematic2)[i],0); - ((int16x8_t*)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t*)systematic2)[i],1); - ((int16x8_t*)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t*)systematic2)[i],2); - ((int16x8_t*)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t*)systematic2)[i],3); - ((int16x8_t*)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t*)systematic2)[i],4); - ((int16x8_t*)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t*)systematic2)[i],5); - ((int16x8_t*)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t*)systematic2)[i],6); - ((int16x8_t*)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t*)systematic2)[i],7); + ((int16x8_t *)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t *)systematic2)[i],0); + ((int16x8_t *)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t *)systematic2)[i],1); + ((int16x8_t *)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t *)systematic2)[i],2); + ((int16x8_t *)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t *)systematic2)[i],3); + ((int16x8_t *)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t *)systematic2)[i],4); + ((int16x8_t *)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t *)systematic2)[i],5); + ((int16x8_t *)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t *)systematic2)[i],6); + ((int16x8_t *)systematic2)[i]=vsetq_lane_s16(ext[*pi4_p++],((int16x8_t *)systematic2)[i],7); #endif #ifdef DEBUG_LOGMAP - print_shorts("syst2",(int16_t*)&((__m128i *)systematic2)[i]); + print_shorts("syst2",(int16_t *)&((__m128i *)systematic2)[i]); #endif } stop_meas(intl1_stats); - // do log_map from second parity bit - log_map16(systematic2,yparity2,m11,m10,alpha,beta,ext2,n,1,F,offset8_flag,alpha_stats,beta_stats,gamma_stats,ext_stats); - - - pi5_p=pi5tab16[iind]; for (i=0; i<(n>>3); i++) { @@ -1493,20 +1345,20 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, tmp=_mm_insert_epi16(tmp,ext2[*pi5_p++],5); tmp=_mm_insert_epi16(tmp,ext2[*pi5_p++],6); tmp=_mm_insert_epi16(tmp,ext2[*pi5_p++],7); - ((__m128i *)systematic1)[i] = _mm_adds_epi16(_mm_subs_epi16(tmp,((__m128i*)ext)[i]),((__m128i *)systematic0)[i]); + ((__m128i *)systematic1)[i] = _mm_adds_epi16(_mm_subs_epi16(tmp,((__m128i *)ext)[i]),((__m128i *)systematic0)[i]); #elif defined(__arm__) - tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,0); - tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,1); - tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,2); - tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,3); - tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,4); - tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,5); - tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,6); - tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,7); - ((int16x8_t *)systematic1)[i] = vqaddq_s16(vqsubq_s16(tmp,((int16x8_t*)ext)[i]),((int16x8_t *)systematic0)[i]); + tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,0); + tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,1); + tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,2); + tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,3); + tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,4); + tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,5); + tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,6); + tmp=vsetq_lane_s16(ext2[*pi5_p++],tmp,7); + ((int16x8_t *)systematic1)[i] = vqaddq_s16(vqsubq_s16(tmp,((int16x8_t *)ext)[i]),((int16x8_t *)systematic0)[i]); #endif #ifdef DEBUG_LOGMAP - print_shorts("syst1",(int16_t*)&((__m128i *)systematic1)[i]); + print_shorts("syst1",(int16_t *)&((__m128i *)systematic1)[i]); #endif } @@ -1516,16 +1368,16 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, for (i=0; i<(n>>3); i++) { #if defined(__x86_64__) || defined(__i386__) - tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],7); - tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],6); - tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],5); - tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],4); - tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],3); - tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],2); - tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],1); - tmp=_mm_insert_epi16(tmp, ((llr_t*)ext2)[*pi6_p++],0); + tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],7); + tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],6); + tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],5); + tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],4); + tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],3); + tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],2); + tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],1); + tmp=_mm_insert_epi16(tmp, ((llr_t *)ext2)[*pi6_p++],0); #ifdef DEBUG_LOGMAP - print_shorts("tmp",(int16_t*)&tmp); + print_shorts("tmp",(int16_t *)&tmp); #endif tmp=_mm_cmpgt_epi8(_mm_packs_epi16(tmp,zeros),zeros); decoded_bytes[i]=(unsigned char)_mm_movemask_epi8(tmp); @@ -1538,18 +1390,18 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, tmp=vsetq_lane_s16(ext2[*pi6_p++],tmp,2); tmp=vsetq_lane_s16(ext2[*pi6_p++],tmp,1); tmp=vsetq_lane_s16(ext2[*pi6_p++],tmp,0); -// This does: -// [1 2 4 8 16 32 64 128] .* I(ext_i > 0) = 2.^[b0 b1 b2 b3 b4 b5 b6 b7], where bi =I(ext_i > 0) -// [2^b0 + 2^b1 2^b2 + 2^b3 2^b4 + 2^b5 2^b6 + 2^b7] -// [2^b0 + 2^b1 + 2^b2 + 2^b3 2^b4 + 2^b5 + 2^b6 + 2^b7] -// Mask64 = 2^b0 + 2^b1 + 2^b2 + 2^b3 + 2^b4 + 2^b5 + 2^b6 + 2^b7 - uint64x2_t Mask = vpaddlq_u32(vpaddlq_u16(vandq_u16(vcgtq_s16(tmp,zeros), Powers))); + // This does: + // [1 2 4 8 16 32 64 128] .* I(ext_i > 0) = 2.^[b0 b1 b2 b3 b4 b5 b6 b7], where bi =I(ext_i > 0) + // [2^b0 + 2^b1 2^b2 + 2^b3 2^b4 + 2^b5 2^b6 + 2^b7] + // [2^b0 + 2^b1 + 2^b2 + 2^b3 2^b4 + 2^b5 + 2^b6 + 2^b7] + // Mask64 = 2^b0 + 2^b1 + 2^b2 + 2^b3 + 2^b4 + 2^b5 + 2^b6 + 2^b7 + uint64x2_t Mask = vpaddlq_u32(vpaddlq_u16(vandq_u16(vcgtq_s16(tmp,zeros), Powers))); uint64x1_t Mask64 = vget_high_u64(Mask)+vget_low_u64(Mask); decoded_bytes[i] = (uint8_t)Mask64; #endif #ifdef DEBUG_LOGMAP - print_shorts("tmp",(int16_t*)&tmp); - fprintf(fdsse4,"decoded_bytes[%d] %x\n",i,decoded_bytes[i]); + print_shorts("tmp",(int16_t *)&tmp); + fprintf(fdsse4,"decoded_bytes[%u] %x\n",i,decoded_bytes[i]); #endif } } @@ -1559,41 +1411,40 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, oldcrc= *((unsigned int *)(&decoded_bytes[(n>>3)-crc_len])); switch (crc_type) { - - case CRC24_A: - oldcrc&=0x00ffffff; - crc = crc24a(&decoded_bytes[F>>3], - n-24-F)>>8; - temp=((uint8_t *)&crc)[2]; - ((uint8_t *)&crc)[2] = ((uint8_t *)&crc)[0]; - ((uint8_t *)&crc)[0] = temp; - break; - - case CRC24_B: - oldcrc&=0x00ffffff; - crc = crc24b(decoded_bytes, - n-24)>>8; - temp=((uint8_t *)&crc)[2]; - ((uint8_t *)&crc)[2] = ((uint8_t *)&crc)[0]; - ((uint8_t *)&crc)[0] = temp; - break; - - case CRC16: - oldcrc&=0x0000ffff; - crc = crc16(decoded_bytes, - n-16)>>16; - break; - - case CRC8: - oldcrc&=0x000000ff; - crc = crc8(decoded_bytes, - n-8)>>24; - break; - - default: - printf("FATAL: 3gpplte_turbo_decoder_sse.c: Unknown CRC\n"); - return(255); - break; + case CRC24_A: + oldcrc&=0x00ffffff; + crc = crc24a(&decoded_bytes[F>>3], + n-24-F)>>8; + temp=((uint8_t *)&crc)[2]; + ((uint8_t *)&crc)[2] = ((uint8_t *)&crc)[0]; + ((uint8_t *)&crc)[0] = temp; + break; + + case CRC24_B: + oldcrc&=0x00ffffff; + crc = crc24b(decoded_bytes, + n-24)>>8; + temp=((uint8_t *)&crc)[2]; + ((uint8_t *)&crc)[2] = ((uint8_t *)&crc)[0]; + ((uint8_t *)&crc)[0] = temp; + break; + + case CRC16: + oldcrc&=0x0000ffff; + crc = crc16(decoded_bytes, + n-16)>>16; + break; + + case CRC8: + oldcrc&=0x000000ff; + crc = crc8(decoded_bytes, + n-8)>>24; + break; + + default: + printf("FATAL: 3gpplte_turbo_decoder_sse.c: Unknown CRC\n"); + return(255); + break; } stop_meas(intl2_stats); @@ -1610,13 +1461,13 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, if (iteration_cnt < max_iterations) { log_map16(systematic1,yparity1,m11,m10,alpha,beta,ext,n,0,F,offset8_flag,alpha_stats,beta_stats,gamma_stats,ext_stats); #if defined(__x86_64__) || defined(__i386__) - __m128i* ext_128=(__m128i*) ext; - __m128i* s1_128=(__m128i*) systematic1; - __m128i* s0_128=(__m128i*) systematic0; + __m128i *ext_128=(__m128i *) ext; + __m128i *s1_128=(__m128i *) systematic1; + __m128i *s0_128=(__m128i *) systematic0; #elif defined(__arm__) - int16x8_t* ext_128=(int16x8_t*) ext; - int16x8_t* s1_128=(int16x8_t*) systematic1; - int16x8_t* s0_128=(int16x8_t*) systematic0; + int16x8_t *ext_128=(int16x8_t *) ext; + int16x8_t *s1_128=(int16x8_t *) systematic1; + int16x8_t *s0_128=(int16x8_t *) systematic0; #endif int myloop=n>>3; @@ -1630,13 +1481,11 @@ uint8_t phy_threegpplte_turbo_decoder16(int16_t *y, } } } - - // fprintf(fdsse4,"crc %x, oldcrc %x\n",crc,oldcrc); + // fprintf(fdsse4,"crc %x, oldcrc %x\n",crc,oldcrc); #ifdef DEBUG_LOGMAP - fclose(fdsse4); + fclose(fdsse4); #endif - #if defined(__x86_64__) || defined(__i386__) _mm_empty(); _m_empty(); diff --git a/openair1/PHY/CODING/3gpplte_turbo_decoder_sse_8bit.c b/openair1/PHY/CODING/3gpplte_turbo_decoder_sse_8bit.c index 476e79cacb7..e1ca906b55b 100644 --- a/openair1/PHY/CODING/3gpplte_turbo_decoder_sse_8bit.c +++ b/openair1/PHY/CODING/3gpplte_turbo_decoder_sse_8bit.c @@ -39,39 +39,39 @@ #include "PHY/sse_intrin.h" #ifndef TEST_DEBUG -#include "PHY/defs_common.h" -#include "PHY/CODING/coding_defs.h" -#include "PHY/CODING/lte_interleaver_inline.h" + #include "PHY/defs_common.h" + #include "PHY/CODING/coding_defs.h" + #include "PHY/CODING/lte_interleaver_inline.h" #else -#include "defs.h" -#include <stdio.h> -#include <stdlib.h> -#include <string.h> + #include "defs.h" + #include <stdio.h> + #include <stdlib.h> + #include <string.h> #endif #ifdef MEX -#include "mex.h" + #include "mex.h" #endif #include "common/ran_context.h" #define SHUFFLE16(a,b,c,d,e,f,g,h) _mm_set_epi8(h==-1?-1:h*2+1, \ - h==-1?-1:h*2, \ - g==-1?-1:g*2+1, \ - g==-1?-1:g*2, \ - f==-1?-1:f*2+1, \ - f==-1?-1:f*2, \ - e==-1?-1:e*2+1, \ - e==-1?-1:e*2, \ - d==-1?-1:d*2+1, \ - d==-1?-1:d*2, \ - c==-1?-1:c*2+1, \ - c==-1?-1:c*2, \ - b==-1?-1:b*2+1, \ - b==-1?-1:b*2, \ - a==-1?-1:a*2+1, \ - a==-1?-1:a*2); + h==-1?-1:h*2, \ + g==-1?-1:g*2+1, \ + g==-1?-1:g*2, \ + f==-1?-1:f*2+1, \ + f==-1?-1:f*2, \ + e==-1?-1:e*2+1, \ + e==-1?-1:e*2, \ + d==-1?-1:d*2+1, \ + d==-1?-1:d*2, \ + c==-1?-1:c*2+1, \ + c==-1?-1:c*2, \ + b==-1?-1:b*2+1, \ + b==-1?-1:b*2, \ + a==-1?-1:a*2+1, \ + a==-1?-1:a*2); @@ -86,32 +86,28 @@ typedef int8_t channel_t; #define MAX8 127 -void log_map8(llr_t* systematic,channel_t* y_parity, llr_t* m11, llr_t* m10, llr_t *alpha, llr_t *beta, llr_t* ext,unsigned short frame_length,unsigned char term_flag,unsigned char F,int offset8_flag, +void log_map8(llr_t *systematic,channel_t *y_parity, llr_t *m11, llr_t *m10, llr_t *alpha, llr_t *beta, llr_t *ext,unsigned short frame_length,unsigned char term_flag,unsigned char F,int offset8_flag, time_stats_t *alpha_stats,time_stats_t *beta_stats,time_stats_t *gamma_stats,time_stats_t *ext_stats); -void compute_gamma8(llr_t* m11,llr_t* m10,llr_t* systematic, channel_t* y_parity, unsigned short frame_length,unsigned char term_flag); -void compute_alpha8(llr_t*alpha,llr_t *beta, llr_t* m11,llr_t* m10, unsigned short frame_length,unsigned char F); -void compute_beta8(llr_t*alpha, llr_t* beta,llr_t* m11,llr_t* m10, unsigned short frame_length,unsigned char F,int offset8_flag); -void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m11,llr_t* m10,llr_t* extrinsic, llr_t* ap, unsigned short frame_length); - - -void print_bytes(char *s, int8_t *x) -{ +void compute_gamma8(llr_t *m11,llr_t *m10,llr_t *systematic, channel_t *y_parity, unsigned short frame_length,unsigned char term_flag); +void compute_alpha8(llr_t *alpha,llr_t *beta, llr_t *m11,llr_t *m10, unsigned short frame_length,unsigned char F); +void compute_beta8(llr_t *alpha, llr_t *beta,llr_t *m11,llr_t *m10, unsigned short frame_length,unsigned char F,int offset8_flag); +void compute_ext8(llr_t *alpha,llr_t *beta,llr_t *m11,llr_t *m10,llr_t *extrinsic, llr_t *ap, unsigned short frame_length); +void print_bytes(char *s, int8_t *x) { printf("%s : %d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d\n",s, x[0],x[1],x[2],x[3],x[4],x[5],x[6],x[7], x[8],x[9],x[10],x[11],x[12],x[13],x[14],x[15]); - } -void log_map8(llr_t* systematic, - channel_t* y_parity, - llr_t* m11, - llr_t* m10, +void log_map8(llr_t *systematic, + channel_t *y_parity, + llr_t *m11, + llr_t *m10, llr_t *alpha, llr_t *beta, - llr_t* ext, + llr_t *ext, unsigned short frame_length, unsigned char term_flag, unsigned char F, @@ -119,32 +115,38 @@ void log_map8(llr_t* systematic, time_stats_t *alpha_stats, time_stats_t *beta_stats, time_stats_t *gamma_stats, - time_stats_t *ext_stats) -{ - + time_stats_t *ext_stats) { #ifdef DEBUG_LOGMAP printf("log_map, frame_length %d\n",frame_length); #endif if (gamma_stats) start_meas(gamma_stats) ; + compute_gamma8(m11,m10,systematic,y_parity,frame_length,term_flag) ; + if (gamma_stats) stop_meas(gamma_stats); + if (alpha_stats) start_meas(alpha_stats) ; + compute_alpha8(alpha,beta,m11,m10,frame_length,F) ; + if (alpha_stats) stop_meas(alpha_stats); + if (beta_stats) start_meas(beta_stats) ; + compute_beta8(alpha,beta,m11,m10,frame_length,F,offset8_flag) ; + if (beta_stats) stop_meas(beta_stats); + if (ext_stats) start_meas(ext_stats) ; - compute_ext8(alpha,beta,m11,m10,ext,systematic,frame_length) ; - if (ext_stats) stop_meas(ext_stats); + compute_ext8(alpha,beta,m11,m10,ext,systematic,frame_length) ; + if (ext_stats) stop_meas(ext_stats); } -void compute_gamma8(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity, - unsigned short frame_length,unsigned char term_flag) -{ +void compute_gamma8(llr_t *m11,llr_t *m10,llr_t *systematic,channel_t *y_parity, + unsigned short frame_length,unsigned char term_flag) { int k,K1; #if defined(__x86_64__)||defined(__i386__) __m128i *systematic128 = (__m128i *)systematic; @@ -157,11 +159,9 @@ void compute_gamma8(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity, int8x16_t *m10_128 = (int8x16_t *)m10; int8x16_t *m11_128 = (int8x16_t *)m11; #endif - #ifdef DEBUG_LOGMAP printf("compute_gamma, %p,%p,%p,%p,framelength %d\n",m11,m10,systematic,y_parity,frame_length); #endif - #if defined(__x86_64__) || defined(__i386__) register __m128i sl,sh,ypl,yph; //K128=_mm_set1_epi8(-128); #endif @@ -181,11 +181,9 @@ void compute_gamma8(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity, m11_128[k] = vhaddq_s8(systematic128[k],y_parity128[k]); m10_128[k] = vhsubq_s8(systematic128[k],y_parity128[k]); #endif - } // Termination - #if defined(__x86_64__) || defined(__i386__) sl = _mm_cvtepi8_epi16(systematic128[k+term_flag]); sh = _mm_cvtepi8_epi16(_mm_srli_si128(systematic128[k],8)); @@ -199,15 +197,12 @@ void compute_gamma8(llr_t* m11,llr_t* m10,llr_t* systematic,channel_t* y_parity, m11_128[k] = vhaddq_s8(systematic128[k+term_flag],y_parity128[k]); m10_128[k] = vhsubq_s8(systematic128[k+term_flag],y_parity128[k]); #endif - } #define L 16 -void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned short frame_length,unsigned char F) -{ +void compute_alpha8(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned short frame_length,unsigned char F) { int k,loopval,rerun_flag; - #if defined(__x86_64__) || defined(__i386__) __m128i *alpha128=(__m128i *)alpha,*alpha_ptr; __m128i *m11p,*m10p; @@ -223,7 +218,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh #endif // Set initial state: first colum is known // the other columns are unknown, so all states are set to same value - #if defined(__x86_64__) || defined(__i386__) alpha128[0] = _mm_set_epi8(-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,0); alpha128[1] = _mm_set_epi8(-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2); @@ -233,12 +227,11 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh alpha128[5] = _mm_set_epi8(-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2); alpha128[6] = _mm_set_epi8(-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2); alpha128[7] = _mm_set_epi8(-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2,-MAX8/2); - for (loopval=frame_length>>4, rerun_flag=0; rerun_flag<2; loopval=L, rerun_flag++) { + for (loopval=frame_length>>4, rerun_flag=0; rerun_flag<2; loopval=L, rerun_flag++) { alpha_ptr = &alpha128[0]; - - m11p = (__m128i*)m_11; - m10p = (__m128i*)m_10; + m11p = (__m128i *)m_11; + m10p = (__m128i *)m_10; for (k=0; k<loopval; k++) { m_b0 = _mm_adds_epi8(alpha_ptr[1],*m11p); // m11 @@ -249,7 +242,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh m_b6 = _mm_subs_epi8(alpha_ptr[5],*m10p); // m01=-m10 m_b3 = _mm_subs_epi8(alpha_ptr[7],*m11p); // m00=-m11 m_b7 = _mm_adds_epi8(alpha_ptr[7],*m11p); // m11 - new0 = _mm_subs_epi8(alpha_ptr[0],*m11p); // m00=-m11 new4 = _mm_adds_epi8(alpha_ptr[0],*m11p); // m11 new1 = _mm_adds_epi8(alpha_ptr[2],*m10p); // m10 @@ -258,7 +250,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh new6 = _mm_adds_epi8(alpha_ptr[4],*m10p); // m10 new3 = _mm_adds_epi8(alpha_ptr[6],*m11p); // m11 new7 = _mm_subs_epi8(alpha_ptr[6],*m11p); // m00=-m11 - alpha_ptr += 8; m11p++; m10p++; @@ -270,7 +261,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh alpha_ptr[5] = _mm_max_epi8(m_b5,new5); alpha_ptr[6] = _mm_max_epi8(m_b6,new6); alpha_ptr[7] = _mm_max_epi8(m_b7,new7); - // compute and subtract maxima alpha_max = _mm_max_epi8(alpha_ptr[0],alpha_ptr[1]); alpha_max = _mm_max_epi8(alpha_max,alpha_ptr[2]); @@ -279,7 +269,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh alpha_max = _mm_max_epi8(alpha_max,alpha_ptr[5]); alpha_max = _mm_max_epi8(alpha_max,alpha_ptr[6]); alpha_max = _mm_max_epi8(alpha_max,alpha_ptr[7]); - alpha_ptr[0] = _mm_subs_epi8(alpha_ptr[0],alpha_max); alpha_ptr[1] = _mm_subs_epi8(alpha_ptr[1],alpha_max); alpha_ptr[2] = _mm_subs_epi8(alpha_ptr[2],alpha_max); @@ -308,8 +297,8 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh alpha[80] = -MAX8/2; alpha[96] = -MAX8/2; alpha[112] = -MAX8/2; - } + #elif defined(__arm__) alpha128[0] = vdupq_n_s8(-MAX8/2); alpha128[0] = vsetq_lane_s8(0,alpha128[0],0); @@ -320,12 +309,11 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh alpha128[5] = vdupq_n_s8(-MAX8/2); alpha128[6] = vdupq_n_s8(-MAX8/2); alpha128[7] = vdupq_n_s8(-MAX8/2); - for (loopval=frame_length>>4, rerun_flag=0; rerun_flag<2; loopval=L, rerun_flag++) { + for (loopval=frame_length>>4, rerun_flag=0; rerun_flag<2; loopval=L, rerun_flag++) { alpha_ptr = &alpha128[0]; - - m11p = (int8x16_t*)m_11; - m10p = (int8x16_t*)m_10; + m11p = (int8x16_t *)m_11; + m10p = (int8x16_t *)m_10; for (k=0; k<loopval; k++) { m_b0 = vqaddq_s8(alpha_ptr[1],*m11p); // m11 @@ -336,7 +324,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh m_b6 = vqsubq_s8(alpha_ptr[5],*m10p); // m01=-m10 m_b3 = vqsubq_s8(alpha_ptr[7],*m11p); // m00=-m11 m_b7 = vqaddq_s8(alpha_ptr[7],*m11p); // m11 - new0 = vqsubq_s8(alpha_ptr[0],*m11p); // m00=-m11 new4 = vqaddq_s8(alpha_ptr[0],*m11p); // m11 new1 = vqaddq_s8(alpha_ptr[2],*m10p); // m10 @@ -345,7 +332,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh new6 = vqaddq_s8(alpha_ptr[4],*m10p); // m10 new3 = vqaddq_s8(alpha_ptr[6],*m11p); // m11 new7 = vqsubq_s8(alpha_ptr[6],*m11p); // m00=-m11 - alpha_ptr += 8; m11p++; m10p++; @@ -357,7 +343,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh alpha_ptr[5] = vmaxq_s8(m_b5,new5); alpha_ptr[6] = vmaxq_s8(m_b6,new6); alpha_ptr[7] = vmaxq_s8(m_b7,new7); - // compute and subtract maxima alpha_max = vmaxq_s8(alpha_ptr[0],alpha_ptr[1]); alpha_max = vmaxq_s8(alpha_max,alpha_ptr[2]); @@ -366,7 +351,6 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh alpha_max = vmaxq_s8(alpha_max,alpha_ptr[5]); alpha_max = vmaxq_s8(alpha_max,alpha_ptr[6]); alpha_max = vmaxq_s8(alpha_max,alpha_ptr[7]); - alpha_ptr[0] = vqsubq_s8(alpha_ptr[0],alpha_max); alpha_ptr[1] = vqsubq_s8(alpha_ptr[1],alpha_max); alpha_ptr[2] = vqsubq_s8(alpha_ptr[2],alpha_max); @@ -380,14 +364,22 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh // Set intial state for next iteration from the last state // as a column end states are the first states of the next column int K1= frame_length>>1; - alpha128[0] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[K1],8); alpha128[0] = vsetq_lane_s8(alpha[8],alpha128[0],7); - alpha128[1] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[1+K1],8); alpha128[1] = vsetq_lane_s8(alpha[24],alpha128[0],7); - alpha128[2] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[2+K1],8); alpha128[2] = vsetq_lane_s8(alpha[40],alpha128[0],7); - alpha128[3] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[3+K1],8); alpha128[3] = vsetq_lane_s8(alpha[56],alpha128[0],7); - alpha128[4] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[4+K1],8); alpha128[4] = vsetq_lane_s8(alpha[72],alpha128[0],7); - alpha128[5] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[5+K1],8); alpha128[5] = vsetq_lane_s8(alpha[88],alpha128[0],7); - alpha128[6] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[6+K1],8); alpha128[6] = vsetq_lane_s8(alpha[104],alpha128[0],7); - alpha128[7] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[7+K1],8); alpha128[7] = vsetq_lane_s8(alpha[120],alpha128[0],7); + alpha128[0] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[K1],8); + alpha128[0] = vsetq_lane_s8(alpha[8],alpha128[0],7); + alpha128[1] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[1+K1],8); + alpha128[1] = vsetq_lane_s8(alpha[24],alpha128[0],7); + alpha128[2] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[2+K1],8); + alpha128[2] = vsetq_lane_s8(alpha[40],alpha128[0],7); + alpha128[3] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[3+K1],8); + alpha128[3] = vsetq_lane_s8(alpha[56],alpha128[0],7); + alpha128[4] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[4+K1],8); + alpha128[4] = vsetq_lane_s8(alpha[72],alpha128[0],7); + alpha128[5] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[5+K1],8); + alpha128[5] = vsetq_lane_s8(alpha[88],alpha128[0],7); + alpha128[6] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[6+K1],8); + alpha128[6] = vsetq_lane_s8(alpha[104],alpha128[0],7); + alpha128[7] = (int8x16_t)vshlq_n_s64((int64x2_t)alpha128[7+K1],8); + alpha128[7] = vsetq_lane_s8(alpha[120],alpha128[0],7); alpha[16] = -MAX8/2; alpha[32] = -MAX8/2; alpha[48] = -MAX8/2; @@ -395,35 +387,28 @@ void compute_alpha8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,unsigned sh alpha[80] = -MAX8/2; alpha[96] = -MAX8/2; alpha[112] = -MAX8/2; - } -#endif - +#endif } -void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned short frame_length,unsigned char F,int offset8_flag) -{ - +void compute_beta8(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,unsigned short frame_length,unsigned char F,int offset8_flag) { int k,rerun_flag, loopval; #if defined(__x86_64__) || defined(__i386__) __m128i m11_128,m10_128; __m128i m_b0,m_b1,m_b2,m_b3,m_b4,m_b5,m_b6,m_b7; __m128i new0,new1,new2,new3,new4,new5,new6,new7; - __m128i *beta128,*alpha128,*beta_ptr; __m128i beta_max; #elif defined(__arm__) int8x16_t m11_128,m10_128; int8x16_t m_b0,m_b1,m_b2,m_b3,m_b4,m_b5,m_b6,m_b7; int8x16_t new0,new1,new2,new3,new4,new5,new6,new7; - int8x16_t *beta128,*alpha128,*beta_ptr; int8x16_t beta_max; #endif llr_t beta0,beta1; - llr_t beta2,beta3,beta4,beta5,beta6,beta7; if (frame_length > 6144) { @@ -433,13 +418,12 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho // we are supposed to run compute_alpha just before compute_beta // so the initial states of backward computation can be set from last value of alpha states (forward computation) - #if defined(__x86_64__) || defined(__i386__) - beta_ptr = (__m128i*)&beta[frame_length<<3]; - alpha128 = (__m128i*)&alpha[0]; + beta_ptr = (__m128i *)&beta[frame_length<<3]; + alpha128 = (__m128i *)&alpha[0]; #elif defined(__arm__) - beta_ptr = (int8x16_t*)&beta[frame_length<<3]; - alpha128 = (int8x16_t*)&alpha[0]; + beta_ptr = (int8x16_t *)&beta[frame_length<<3]; + alpha128 = (int8x16_t *)&alpha[0]; #endif beta_ptr[0] = alpha128[(frame_length>>1)]; beta_ptr[1] = alpha128[1+(frame_length>>1)]; @@ -449,18 +433,15 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho beta_ptr[5] = alpha128[5+(frame_length>>1)]; beta_ptr[6] = alpha128[6+(frame_length>>1)]; beta_ptr[7] = alpha128[7+(frame_length>>1)]; - int overlap = (frame_length>>4)> L ? (frame_length>>4)-L : 0 ; for (rerun_flag=0, loopval=0; rerun_flag<2 ; loopval=overlap,rerun_flag++) { - if (offset8_flag==0) { // FIXME! beta0-beta7 are used uninitialized. FIXME! // workaround: init with 0 beta0 = beta1 = beta2 = beta3 = beta4 = beta5 = beta6 = beta7 = 0; - #if defined(__x86_64__) || defined(__i386__) beta_ptr[0] = _mm_insert_epi8(beta_ptr[0],beta0,15); beta_ptr[1] = _mm_insert_epi8(beta_ptr[1],beta1,15); @@ -483,16 +464,17 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho } #if defined(__x86_64__) || defined(__i386__) - beta_ptr = (__m128i*)&beta[frame_length<<3]; + beta_ptr = (__m128i *)&beta[frame_length<<3]; #elif defined(__arm__) - beta_ptr = (int8x16_t*)&beta[frame_length<<3]; + beta_ptr = (int8x16_t *)&beta[frame_length<<3]; #endif + for (k=(frame_length>>4)-1; k>=loopval; k--) { #if defined(__x86_64__) || defined(__i386__) - m11_128=((__m128i*)m_11)[k]; - m10_128=((__m128i*)m_10)[k]; + m11_128=((__m128i *)m_11)[k]; + m10_128=((__m128i *)m_10)[k]; m_b0 = _mm_adds_epi8(beta_ptr[4],m11_128); //m11 m_b1 = _mm_subs_epi8(beta_ptr[4],m11_128); //m00 m_b2 = _mm_subs_epi8(beta_ptr[5],m10_128); //m01 @@ -501,7 +483,6 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho m_b5 = _mm_subs_epi8(beta_ptr[6],m10_128); //m01 m_b6 = _mm_subs_epi8(beta_ptr[7],m11_128); //m00 m_b7 = _mm_adds_epi8(beta_ptr[7],m11_128); //m11 - new0 = _mm_subs_epi8(beta_ptr[0],m11_128); //m00 new1 = _mm_adds_epi8(beta_ptr[0],m11_128); //m11 new2 = _mm_adds_epi8(beta_ptr[1],m10_128); //m10 @@ -510,9 +491,7 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho new5 = _mm_adds_epi8(beta_ptr[2],m10_128); //m10 new6 = _mm_adds_epi8(beta_ptr[3],m11_128); //m11 new7 = _mm_subs_epi8(beta_ptr[3],m11_128); //m00 - beta_ptr-=8; - beta_ptr[0] = _mm_max_epi8(m_b0,new0); beta_ptr[1] = _mm_max_epi8(m_b1,new1); beta_ptr[2] = _mm_max_epi8(m_b2,new2); @@ -521,7 +500,6 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho beta_ptr[5] = _mm_max_epi8(m_b5,new5); beta_ptr[6] = _mm_max_epi8(m_b6,new6); beta_ptr[7] = _mm_max_epi8(m_b7,new7); - beta_max = _mm_max_epi8(beta_ptr[0],beta_ptr[1]); beta_max = _mm_max_epi8(beta_max ,beta_ptr[2]); beta_max = _mm_max_epi8(beta_max ,beta_ptr[3]); @@ -529,7 +507,6 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho beta_max = _mm_max_epi8(beta_max ,beta_ptr[5]); beta_max = _mm_max_epi8(beta_max ,beta_ptr[6]); beta_max = _mm_max_epi8(beta_max ,beta_ptr[7]); - beta_ptr[0] = _mm_subs_epi8(beta_ptr[0],beta_max); beta_ptr[1] = _mm_subs_epi8(beta_ptr[1],beta_max); beta_ptr[2] = _mm_subs_epi8(beta_ptr[2],beta_max); @@ -539,8 +516,8 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho beta_ptr[6] = _mm_subs_epi8(beta_ptr[6],beta_max); beta_ptr[7] = _mm_subs_epi8(beta_ptr[7],beta_max); #elif defined(__arm__) - m11_128=((int8x16_t*)m_11)[k]; - m10_128=((int8x16_t*)m_10)[k]; + m11_128=((int8x16_t *)m_11)[k]; + m10_128=((int8x16_t *)m_10)[k]; m_b0 = vqaddq_s8(beta_ptr[4],m11_128); //m11 m_b1 = vqsubq_s8(beta_ptr[4],m11_128); //m00 m_b2 = vqsubq_s8(beta_ptr[5],m10_128); //m01 @@ -549,7 +526,6 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho m_b5 = vqsubq_s8(beta_ptr[6],m10_128); //m01 m_b6 = vqsubq_s8(beta_ptr[7],m11_128); //m00 m_b7 = vqaddq_s8(beta_ptr[7],m11_128); //m11 - new0 = vqsubq_s8(beta_ptr[0],m11_128); //m00 new1 = vqaddq_s8(beta_ptr[0],m11_128); //m11 new2 = vqaddq_s8(beta_ptr[1],m10_128); //m10 @@ -558,9 +534,7 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho new5 = vqaddq_s8(beta_ptr[2],m10_128); //m10 new6 = vqaddq_s8(beta_ptr[3],m11_128); //m11 new7 = vqsubq_s8(beta_ptr[3],m11_128); //m00 - beta_ptr-=8; - beta_ptr[0] = vmaxq_s8(m_b0,new0); beta_ptr[1] = vmaxq_s8(m_b1,new1); beta_ptr[2] = vmaxq_s8(m_b2,new2); @@ -569,7 +543,6 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho beta_ptr[5] = vmaxq_s8(m_b5,new5); beta_ptr[6] = vmaxq_s8(m_b6,new6); beta_ptr[7] = vmaxq_s8(m_b7,new7); - beta_max = vmaxq_s8(beta_ptr[0],beta_ptr[1]); beta_max = vmaxq_s8(beta_max ,beta_ptr[2]); beta_max = vmaxq_s8(beta_max ,beta_ptr[3]); @@ -577,7 +550,6 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho beta_max = vmaxq_s8(beta_max ,beta_ptr[5]); beta_max = vmaxq_s8(beta_max ,beta_ptr[6]); beta_max = vmaxq_s8(beta_max ,beta_ptr[7]); - beta_ptr[0] = vqsubq_s8(beta_ptr[0],beta_max); beta_ptr[1] = vqsubq_s8(beta_ptr[1],beta_max); beta_ptr[2] = vqsubq_s8(beta_ptr[2],beta_max); @@ -592,10 +564,9 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho // Set intial state for next iteration from the last state // as column last states are the first states of the next column // The initial state of column 0 is coming from tail bits (to be computed) - #if defined(__x86_64__) || defined(__i386__) - beta128 = (__m128i*)&beta[0]; - beta_ptr = (__m128i*)&beta[frame_length<<3]; + beta128 = (__m128i *)&beta[0]; + beta_ptr = (__m128i *)&beta[frame_length<<3]; beta_ptr[0] = _mm_srli_si128(beta128[0],1); beta_ptr[1] = _mm_srli_si128(beta128[1],1); beta_ptr[2] = _mm_srli_si128(beta128[2],1); @@ -605,23 +576,29 @@ void compute_beta8(llr_t* alpha,llr_t* beta,llr_t *m_11,llr_t* m_10,unsigned sho beta_ptr[6] = _mm_srli_si128(beta128[6],1); beta_ptr[7] = _mm_srli_si128(beta128[7],1); #elif defined(__arm__) - beta128 = (int8x16_t*)&beta[0]; - beta_ptr = (int8x16_t*)&beta[frame_length<<3]; - beta_ptr[0] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[0],8); beta_ptr[0] = vsetq_lane_s8(beta[7],beta_ptr[0],8); - beta_ptr[1] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[1],8); beta_ptr[1] = vsetq_lane_s8(beta[23],beta_ptr[1],8); - beta_ptr[2] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[2],8); beta_ptr[2] = vsetq_lane_s8(beta[39],beta_ptr[2],8); - beta_ptr[3] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[3],8); beta_ptr[3] = vsetq_lane_s8(beta[55],beta_ptr[3],8); - beta_ptr[4] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[4],8); beta_ptr[4] = vsetq_lane_s8(beta[71],beta_ptr[4],8); - beta_ptr[5] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[5],8); beta_ptr[5] = vsetq_lane_s8(beta[87],beta_ptr[5],8); - beta_ptr[6] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[6],8); beta_ptr[6] = vsetq_lane_s8(beta[103],beta_ptr[6],8); - beta_ptr[7] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[7],8); beta_ptr[7] = vsetq_lane_s8(beta[119],beta_ptr[7],8); + beta128 = (int8x16_t *)&beta[0]; + beta_ptr = (int8x16_t *)&beta[frame_length<<3]; + beta_ptr[0] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[0],8); + beta_ptr[0] = vsetq_lane_s8(beta[7],beta_ptr[0],8); + beta_ptr[1] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[1],8); + beta_ptr[1] = vsetq_lane_s8(beta[23],beta_ptr[1],8); + beta_ptr[2] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[2],8); + beta_ptr[2] = vsetq_lane_s8(beta[39],beta_ptr[2],8); + beta_ptr[3] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[3],8); + beta_ptr[3] = vsetq_lane_s8(beta[55],beta_ptr[3],8); + beta_ptr[4] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[4],8); + beta_ptr[4] = vsetq_lane_s8(beta[71],beta_ptr[4],8); + beta_ptr[5] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[5],8); + beta_ptr[5] = vsetq_lane_s8(beta[87],beta_ptr[5],8); + beta_ptr[6] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[6],8); + beta_ptr[6] = vsetq_lane_s8(beta[103],beta_ptr[6],8); + beta_ptr[7] = (int8x16_t)vshrq_n_s64((int64x2_t)beta128[7],8); + beta_ptr[7] = vsetq_lane_s8(beta[119],beta_ptr[7],8); #endif } } -void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, llr_t* systematic,unsigned short frame_length) -{ - +void compute_ext8(llr_t *alpha,llr_t *beta,llr_t *m_11,llr_t *m_10,llr_t *ext, llr_t *systematic,unsigned short frame_length) { #if defined(__x86_64__) || defined(__i386__) __m128i *alpha128=(__m128i *)alpha; __m128i *beta128=(__m128i *)beta; @@ -642,27 +619,20 @@ void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, l int8x16_t m11_1,m11_2,m11_3,m11_4; #endif int k; - // // LLR computation, 8 consequtive bits per loop // - #ifdef DEBUG_LOGMAP printf("compute_ext, %p, %p, %p, %p, %p, %p ,framelength %d\n",alpha,beta,m_11,m_10,ext,systematic,frame_length); #endif - alpha_ptr = alpha128; beta_ptr = &beta128[8]; - for (k=0; k<(frame_length>>4); k++) { - #if defined(__x86_64__) || defined(__i386__) - - m11_128 = (__m128i*)&m_11[k<<4]; - m10_128 = (__m128i*)&m_10[k<<4]; - ext_128 = (__m128i*)&ext[k<<4]; - + m11_128 = (__m128i *)&m_11[k<<4]; + m10_128 = (__m128i *)&m_10[k<<4]; + ext_128 = (__m128i *)&ext[k<<4]; m00_4 = _mm_adds_epi8(alpha_ptr[7],beta_ptr[3]); //ALPHA_BETA_4m00; m11_4 = _mm_adds_epi8(alpha_ptr[7],beta_ptr[7]); //ALPHA_BETA_4m11; m00_3 = _mm_adds_epi8(alpha_ptr[6],beta_ptr[7]); //ALPHA_BETA_3m00; @@ -679,7 +649,6 @@ void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, l m10_2 = _mm_adds_epi8(alpha_ptr[3],beta_ptr[5]); //ALPHA_BETA_2m10; m10_1 = _mm_adds_epi8(alpha_ptr[2],beta_ptr[1]); //ALPHA_BETA_1m10; m01_1 = _mm_adds_epi8(alpha_ptr[2],beta_ptr[5]); //ALPHA_BETA_1m01; - m01_1 = _mm_max_epi8(m01_1,m01_2); m01_1 = _mm_max_epi8(m01_1,m01_3); m01_1 = _mm_max_epi8(m01_1,m01_4); @@ -692,28 +661,19 @@ void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, l m11_1 = _mm_max_epi8(m11_1,m11_2); m11_1 = _mm_max_epi8(m11_1,m11_3); m11_1 = _mm_max_epi8(m11_1,m11_4); - - m01_1 = _mm_subs_epi8(m01_1,*m10_128); m00_1 = _mm_subs_epi8(m00_1,*m11_128); m10_1 = _mm_adds_epi8(m10_1,*m10_128); m11_1 = _mm_adds_epi8(m11_1,*m11_128); - - m01_1 = _mm_max_epi8(m01_1,m00_1); m10_1 = _mm_max_epi8(m10_1,m11_1); - - *ext_128 = _mm_subs_epi8(m10_1,m01_1); - alpha_ptr+=8; beta_ptr+=8; #elif defined(__arm__) - - m11_128 = (int8x16_t*)&m_11[k<<4]; - m10_128 = (int8x16_t*)&m_10[k<<4]; - ext_128 = (int8x16_t*)&ext[k<<4]; - + m11_128 = (int8x16_t *)&m_11[k<<4]; + m10_128 = (int8x16_t *)&m_10[k<<4]; + ext_128 = (int8x16_t *)&ext[k<<4]; m00_4 = vqaddq_s8(alpha_ptr[7],beta_ptr[3]); //ALPHA_BETA_4m00; m11_4 = vqaddq_s8(alpha_ptr[7],beta_ptr[7]); //ALPHA_BETA_4m11; m00_3 = vqaddq_s8(alpha_ptr[6],beta_ptr[7]); //ALPHA_BETA_3m00; @@ -730,7 +690,6 @@ void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, l m10_2 = vqaddq_s8(alpha_ptr[3],beta_ptr[5]); //ALPHA_BETA_2m10; m10_1 = vqaddq_s8(alpha_ptr[2],beta_ptr[1]); //ALPHA_BETA_1m10; m01_1 = vqaddq_s8(alpha_ptr[2],beta_ptr[5]); //ALPHA_BETA_1m01; - m01_1 = vmaxq_s8(m01_1,m01_2); m01_1 = vmaxq_s8(m01_1,m01_3); m01_1 = vmaxq_s8(m01_1,m01_4); @@ -743,27 +702,17 @@ void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, l m11_1 = vmaxq_s8(m11_1,m11_2); m11_1 = vmaxq_s8(m11_1,m11_3); m11_1 = vmaxq_s8(m11_1,m11_4); - - m01_1 = vqsubq_s8(m01_1,*m10_128); m00_1 = vqsubq_s8(m00_1,*m11_128); m10_1 = vqaddq_s8(m10_1,*m10_128); m11_1 = vqaddq_s8(m11_1,*m11_128); - - m01_1 = vmaxq_s8(m01_1,m00_1); m10_1 = vmaxq_s8(m10_1,m11_1); - - *ext_128 = vqsubq_s8(m10_1,m01_1); - alpha_ptr+=8; beta_ptr+=8; - #endif } - - } @@ -771,8 +720,7 @@ void compute_ext8(llr_t* alpha,llr_t* beta,llr_t* m_11,llr_t* m_10,llr_t* ext, l //int pi2[n],pi3[n+8],pi5[n+8],pi4[n+8],pi6[n+8], int *pi2tab8[188],*pi5tab8[188],*pi4tab8[188],*pi6tab8[188]; -void free_td8(void) -{ +void free_td8(void) { int ind; for (ind=0; ind<188; ind++) { @@ -787,14 +735,11 @@ void free_td8(void) extern RAN_CONTEXT_t RC; -void init_td8(void) -{ - +void init_td8(void) { int ind,i,j,n,n2,pi,pi3; - short * base_interleaver; + short *base_interleaver; for (ind=0; ind<188; ind++) { - n = f1f2mat[ind].nb_bits; base_interleaver=il_tb+f1f2mat[ind].beg_index; #ifdef MEX @@ -816,68 +761,57 @@ void init_td8(void) n2 = n; for (j=0,i=0; i<n2; i++,j+=16) { - if (j>=n2) j-=(n2-1); pi2tab8[ind][i] = j; // printf("pi2[%d] = %d\n",i,j); } - + for (i=0; i<n2; i++) { pi = base_interleaver[i];//(unsigned int)threegpplte_interleaver(f1,f2,n); pi3 = pi2tab8[ind][pi]; pi4tab8[ind][pi2tab8[ind][i]] = pi3; pi5tab8[ind][pi3] = pi2tab8[ind][i]; pi6tab8[ind][pi] = pi2tab8[ind][i]; - } - + } } } uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, - int16_t *y2, - uint8_t *decoded_bytes, - uint8_t *decoded_bytes2, - uint16_t n, - uint8_t max_iterations, - uint8_t crc_type, - uint8_t F, - time_stats_t *init_stats, - time_stats_t *alpha_stats, - time_stats_t *beta_stats, - time_stats_t *gamma_stats, - time_stats_t *ext_stats, - time_stats_t *intl1_stats, - time_stats_t *intl2_stats) { + int16_t *y2, + uint8_t *decoded_bytes, + uint8_t *decoded_bytes2, + uint16_t n, + uint8_t max_iterations, + uint8_t crc_type, + uint8_t F, + time_stats_t *init_stats, + time_stats_t *alpha_stats, + time_stats_t *beta_stats, + time_stats_t *gamma_stats, + time_stats_t *ext_stats, + time_stats_t *intl1_stats, + time_stats_t *intl2_stats) { /* y is a pointer to the input decoded_bytes is a pointer to the decoded output n is the size in bits of the coded block, with the tail */ - int n2; - llr_t y8[3*(n+16)] __attribute__((aligned(16))); - - llr_t systematic0[n+16] __attribute__ ((aligned(16))); llr_t systematic1[n+16] __attribute__ ((aligned(16))); llr_t systematic2[n+16] __attribute__ ((aligned(16))); llr_t yparity1[n+16] __attribute__ ((aligned(16))); llr_t yparity2[n+16] __attribute__ ((aligned(16))); - llr_t ext[n+128] __attribute__((aligned(16))); llr_t ext2[n+128] __attribute__((aligned(16))); - llr_t alpha[(n+16)*8] __attribute__ ((aligned(16))); llr_t beta[(n+16)*8] __attribute__ ((aligned(16))); llr_t m11[n+16] __attribute__ ((aligned(16))); llr_t m10[n+16] __attribute__ ((aligned(16))); - - // int *pi2_p,*pi4_p,*pi5_p,*pi6_p; int *pi4_p,*pi5_p,*pi6_p; llr_t *s,*s1,*s2,*yp1,*yp2,*yp; - unsigned int i,j,iind;//,pi; unsigned char iteration_cnt=0; unsigned int crc,oldcrc,crc_len; @@ -890,13 +824,11 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, int8x16_t *yp128; int8x16_t tmp128[(n+8)>>3]; int8x16_t tmp, zeros=vdupq_n_s8(0); - const uint8_t __attribute__ ((aligned (16))) _Powers[16]= - { 1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128 }; - + const uint8_t __attribute__ ((aligned (16))) _Powers[16]= + { 1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128 }; // Set the powers of 2 (do it once for all, if applicable) uint8x16_t Powers= vld1q_u8(_Powers); #endif - int offset8_flag=0; if (crc_type > 3) { @@ -904,17 +836,14 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, return 255; } - if (init_stats) start_meas(init_stats); - if ((n&15)>0) { n2 = n+8; offset8_flag=1; } else n2 = n; - for (iind=0; iind < 188 && f1f2mat[iind].nb_bits != n; iind++); if ( iind == 188 ) { @@ -923,31 +852,30 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, } switch (crc_type) { - case CRC24_A: - case CRC24_B: - crc_len=3; - break; + case CRC24_A: + case CRC24_B: + crc_len=3; + break; - case CRC16: - crc_len=2; - break; + case CRC16: + crc_len=2; + break; - case CRC8: - crc_len=1; - break; + case CRC8: + crc_len=1; + break; - default: - crc_len=3; + default: + crc_len=3; } #if defined(__x86_64__) || defined(__i386__) - // note: this makes valgrind freak __m128i avg=_mm_set1_epi32(0); for (i=0; i<(3*(n>>4))+1; i++) { - __m128i tmp=_mm_abs_epi16(_mm_unpackhi_epi16(((__m128i*)y)[i],((__m128i*)y)[i])); - avg=_mm_add_epi32(_mm_cvtepi16_epi32(_mm_abs_epi16(((__m128i*)y)[i])),avg); + __m128i tmp=_mm_abs_epi16(_mm_unpackhi_epi16(((__m128i *)y)[i],((__m128i *)y)[i])); + avg=_mm_add_epi32(_mm_cvtepi16_epi32(_mm_abs_epi16(((__m128i *)y)[i])),avg); avg=_mm_add_epi32(_mm_cvtepi16_epi32(tmp),avg); } @@ -971,15 +899,13 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, for (i=0,j=0; i<(3*(n2>>4))+1; i++,j+=2) ((__m128i *)y8)[i] = _mm_packs_epi16(_mm_srai_epi16(((__m128i *)y)[j],3),_mm_srai_epi16(((__m128i *)y)[j+1],4)); - yp128 = (__m128i*)y8; - + yp128 = (__m128i *)y8; #elif defined(__arm__) - int32x4_t avg=vdupq_n_s32(0); for (i=0; i<(3*(n>>4))+1; i++) { - int16x8_t tmp=vabsq_s16(((int16x8_t*)y)[i]); - avg = vqaddq_s32(avg,vaddl_s16(((int16x4_t*)&tmp)[0],((int16x4_t*)&tmp)[1])); + int16x8_t tmp=vabsq_s16(((int16x8_t *)y)[i]); + avg = vqaddq_s32(avg,vaddl_s16(((int16x4_t *)&tmp)[0],((int16x4_t *)&tmp)[1])); } int32_t round_avg=(vgetq_lane_s32(avg,0)+vgetq_lane_s32(avg,1)+vgetq_lane_s32(avg,2)+vgetq_lane_s32(avg,3))/(n*3); @@ -999,10 +925,8 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, for (i=0,j=0; i<(3*(n2>>3))+1; i++,j+=2) ((int8x8_t *)y8)[i] = vqmovn_s16(vshrq_n_s16(((int16x8_t *)y)[j],3)); - yp128 = (int8x16_t*)y8; - + yp128 = (int8x16_t *)y8; #endif - s = systematic0; s1 = systematic1; s2 = systematic2; @@ -1020,8 +944,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, } #endif - - yp=(llr_t*)yp128; + yp=(llr_t *)yp128; if (n2>n) { /* @@ -1031,7 +954,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, s1[n+4]=0;s1[n+5]=0;s1[n+6]=0;s1[n+7]=0; s2[n]=0;s2[n+1]=0;s2[n+2]=0;s2[n+3]=0; s2[n+4]=0;s2[n+5]=0;s2[n+6]=0;s2[n+7]=0;*/ - yp=(llr_t*)(y8+n); + yp=(llr_t *)(y8+n); } // printf("n=%d,n2=%d\n",n,n2); @@ -1045,7 +968,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, yp1[i] = *yp; yp++; #ifdef DEBUG_LOGMAP - printf("Term 1 (%d): %d %d\n",i,s[i],yp1[i]); + printf("Term 1 (%u): %d %d\n",i,s[i],yp1[i]); #endif //DEBUG_LOGMAP } @@ -1057,7 +980,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, yp2[i-16] = *yp; yp++; #ifdef DEBUG_LOGMAP - printf("Term 2 (%d): %d %d\n",i-16,s[i],yp2[i-16]); + printf("Term 2 (%u): %d %d\n",i-16,s[i],yp2[i-16]); #endif //DEBUG_LOGMAP } @@ -1068,63 +991,59 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, if (init_stats) stop_meas(init_stats); // do log_map from first parity bit - log_map8(systematic0,yparity1,m11,m10,alpha,beta,ext,n2,0,F,offset8_flag,alpha_stats,beta_stats,gamma_stats,ext_stats); - while (iteration_cnt++ < max_iterations) { + while (iteration_cnt++ < max_iterations) { #ifdef DEBUG_LOGMAP printf("\n*******************ITERATION %d (n %d, n2 %d), ext %p\n\n",iteration_cnt,n,n2,ext); #endif //DEBUG_LOGMAP if (intl1_stats) start_meas(intl1_stats); + pi4_p=pi4tab8[iind]; for (i=0; i<(n2>>4); i++) { // steady-state portion #if defined(__x86_64__) || defined(__i386__) - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],0); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],1); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],2); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],3); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],4); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],5); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],6); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],7); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],8); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],9); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],10); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],11); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],12); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],13); - tmp=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],14); - ((__m128i *)systematic2)[i]=_mm_insert_epi8(tmp,((llr_t*)ext)[*pi4_p++],15); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],0); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],1); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],2); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],3); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],4); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],5); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],6); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],7); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],8); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],9); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],10); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],11); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],12); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],13); + tmp=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],14); + ((__m128i *)systematic2)[i]=_mm_insert_epi8(tmp,((llr_t *)ext)[*pi4_p++],15); #elif defined(__arm__) - tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,0); - tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,1); - tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,2); - tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,3); - tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,4); - tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,5); - tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,6); - tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,7); - tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,8); - tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,9); - tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,10); - tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,11); - tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,12); - tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,13); - tmp=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,14); - ((int8x16_t *)systematic2)[i]=vsetq_lane_s8(((llr_t*)ext)[*pi4_p++],tmp,15); + tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,0); + tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,1); + tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,2); + tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,3); + tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,4); + tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,5); + tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,6); + tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,7); + tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,8); + tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,9); + tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,10); + tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,11); + tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,12); + tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,13); + tmp=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,14); + ((int8x16_t *)systematic2)[i]=vsetq_lane_s8(((llr_t *)ext)[*pi4_p++],tmp,15); #endif } if (intl1_stats) stop_meas(intl1_stats); // do log_map from second parity bit - log_map8(systematic2,yparity2,m11,m10,alpha,beta,ext2,n2,1,F,offset8_flag,alpha_stats,beta_stats,gamma_stats,ext_stats); - - - pi5_p=pi5tab8[iind]; uint16_t decoded_bytes_interl[6144/16] __attribute__((aligned(16))); @@ -1148,7 +1067,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, tmp=_mm_insert_epi8(tmp,ext2[*pi5_p++],14); tmp=_mm_insert_epi8(tmp,ext2[*pi5_p++],15); decoded_bytes_interl[i]=(uint16_t) _mm_movemask_epi8(_mm_cmpgt_epi8(tmp,zeros)); - ((__m128i *)systematic1)[i] = _mm_adds_epi8(_mm_subs_epi8(tmp,((__m128i*)ext)[i]),((__m128i *)systematic0)[i]); + ((__m128i *)systematic1)[i] = _mm_adds_epi8(_mm_subs_epi8(tmp,((__m128i *)ext)[i]),((__m128i *)systematic0)[i]); #elif defined(__arm__) tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,0); tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,1); @@ -1166,13 +1085,12 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,13); tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,14); tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,15); - uint64x2_t Mask= vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vandq_u8(vcgtq_s8(tmp,zeros), Powers)))); - vst1q_lane_u8(&((uint8_t*)&decoded_bytes[i])[0], (uint8x16_t)Mask, 0); - vst1q_lane_u8(&((uint8_t*)&decoded_bytes[i])[1], (uint8x16_t)Mask, 8); - ((int8x16_t *)systematic1)[i] = vqaddq_s8(vqsubq_s8(tmp,((int8x16_t*)ext)[i]),((int8x16_t *)systematic0)[i]); + uint64x2_t Mask= vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vandq_u8(vcgtq_s8(tmp,zeros), Powers)))); + vst1q_lane_u8(&((uint8_t *)&decoded_bytes[i])[0], (uint8x16_t)Mask, 0); + vst1q_lane_u8(&((uint8_t *)&decoded_bytes[i])[1], (uint8x16_t)Mask, 8); + ((int8x16_t *)systematic1)[i] = vqaddq_s8(vqsubq_s8(tmp,((int8x16_t *)ext)[i]),((int8x16_t *)systematic0)[i]); #endif } - } else { for (i=0; i<(n2>>4); i++) { #if defined(__x86_64__) || defined(__i386__) @@ -1193,8 +1111,7 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, tmp=_mm_insert_epi8(tmp,ext2[*pi5_p++],14); tmp=_mm_insert_epi8(tmp,ext2[*pi5_p++],15); tmp128[i] = _mm_adds_epi8(((__m128i *)ext2)[i],((__m128i *)systematic2)[i]); - - ((__m128i *)systematic1)[i] = _mm_adds_epi8(_mm_subs_epi8(tmp,((__m128i*)ext)[i]),((__m128i *)systematic0)[i]); + ((__m128i *)systematic1)[i] = _mm_adds_epi8(_mm_subs_epi8(tmp,((__m128i *)ext)[i]),((__m128i *)systematic0)[i]); #elif defined(__arm__) tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,0); tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,1); @@ -1213,11 +1130,9 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,14); tmp=vsetq_lane_s8(ext2[*pi5_p++],tmp,15); tmp128[i] = vqaddq_s8(((int8x16_t *)ext2)[i],((int8x16_t *)systematic2)[i]); - - ((int8x16_t *)systematic1)[i] = vqaddq_s8(vqsubq_s8(tmp,((int8x16_t*)ext)[i]),((int8x16_t *)systematic0)[i]); - -#endif - } + ((int8x16_t *)systematic1)[i] = vqaddq_s8(vqsubq_s8(tmp,((int8x16_t *)ext)[i]),((int8x16_t *)systematic0)[i]); +#endif + } } // Check if we decoded the block @@ -1225,11 +1140,10 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, if (intl2_stats) start_meas(intl2_stats); if ((n2&0x7f) == 0) { // n2 is a multiple of 128 bits - // re-order the decoded bits in theregular order // as it is presently ordered as 16 sequential columns #if defined(__x86_64__) || defined(__i386__) - __m128i* dbytes=(__m128i*)decoded_bytes_interl; + __m128i *dbytes=(__m128i *)decoded_bytes_interl; __m128i shuffle=SHUFFLE16(7,6,5,4,3,2,1,0); __m128i mask __attribute__((aligned(16))); int n_128=n2>>7; @@ -1239,10 +1153,9 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, __m128i tmp __attribute__((aligned(16))); tmp=_mm_shuffle_epi8(dbytes[i],shuffle); __m128i tmp2 __attribute__((aligned(16))) ; - tmp2=_mm_and_si128(tmp,mask); tmp2=_mm_cmpeq_epi16(tmp2,mask); - // printf("decoded_bytes %p\n",decoded_bytes); + // printf("decoded_bytes %p\n",decoded_bytes); decoded_bytes[n_128*0+i]=(uint8_t) _mm_movemask_epi8(_mm_packs_epi16(tmp2,zeros)); int j; @@ -1253,22 +1166,22 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, decoded_bytes[n_128*j +i]=(uint8_t) _mm_movemask_epi8(_mm_packs_epi16(tmp2,zeros)); } } + #elif defined(__arm__) - uint8x16_t* dbytes=(uint8x16_t*)decoded_bytes_interl; + uint8x16_t *dbytes=(uint8x16_t *)decoded_bytes_interl; uint16x8_t mask __attribute__((aligned(16))); int n_128=n2>>7; for (i=0; i<n_128; i++) { mask=vdupq_n_u16(1); uint8x16_t tmp __attribute__((aligned(16))); - tmp=vcombine_u8(vrev64_u8(((uint8x8_t*)&dbytes[i])[1]),vrev64_u8(((uint8x8_t*)&dbytes[i])[0])); + tmp=vcombine_u8(vrev64_u8(((uint8x8_t *)&dbytes[i])[1]),vrev64_u8(((uint8x8_t *)&dbytes[i])[0])); vst1q_lane_u8(&decoded_bytes[n_128*0+i],(uint8x16_t)vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vandq_u8(tmp, Powers)))),0); - int j; for (j=1; j<16; j++) { mask=vshlq_n_u16(mask,1); - vst1q_lane_u8(&decoded_bytes[n_128*0+i],(uint8x16_t)vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vandq_u8(tmp, Powers)))),0); + vst1q_lane_u8(&decoded_bytes[n_128*0+i],(uint8x16_t)vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vandq_u8(tmp, Powers)))),0); } } @@ -1313,9 +1226,9 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, tmp=vsetq_lane_s8(((llr_t *)tmp128)[*pi6_p++],tmp,10); tmp=vsetq_lane_s8(((llr_t *)tmp128)[*pi6_p++],tmp,9); tmp=vsetq_lane_s8(((llr_t *)tmp128)[*pi6_p++],tmp,8); - uint64x2_t Mask= vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vandq_u8(vcgtq_s8(tmp,zeros), Powers)))); - vst1q_lane_u8(&((uint8_t*)&decoded_bytes[i])[0], (uint8x16_t)Mask, 0); - vst1q_lane_u8(&((uint8_t*)&decoded_bytes[i])[1], (uint8x16_t)Mask, 8); + uint64x2_t Mask= vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(vandq_u8(vcgtq_s8(tmp,zeros), Powers)))); + vst1q_lane_u8(&((uint8_t *)&decoded_bytes[i])[0], (uint8x16_t)Mask, 0); + vst1q_lane_u8(&((uint8_t *)&decoded_bytes[i])[1], (uint8x16_t)Mask, 8); #endif } } @@ -1324,41 +1237,40 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, oldcrc= *((unsigned int *)(&decoded_bytes[(n>>3)-crc_len])); switch (crc_type) { - - case CRC24_A: - oldcrc&=0x00ffffff; - crc = crc24a(&decoded_bytes[F>>3], - n-24-F)>>8; - temp=((uint8_t *)&crc)[2]; - ((uint8_t *)&crc)[2] = ((uint8_t *)&crc)[0]; - ((uint8_t *)&crc)[0] = temp; - break; - - case CRC24_B: - oldcrc&=0x00ffffff; - crc = crc24b(decoded_bytes, - n-24)>>8; - temp=((uint8_t *)&crc)[2]; - ((uint8_t *)&crc)[2] = ((uint8_t *)&crc)[0]; - ((uint8_t *)&crc)[0] = temp; - break; - - case CRC16: - oldcrc&=0x0000ffff; - crc = crc16(decoded_bytes, - n-16)>>16; - break; - - case CRC8: - oldcrc&=0x000000ff; - crc = crc8(decoded_bytes, - n-8)>>24; - break; - - default: - printf("FATAL: 3gpplte_turbo_decoder_sse.c: Unknown CRC\n"); - return(255); - break; + case CRC24_A: + oldcrc&=0x00ffffff; + crc = crc24a(&decoded_bytes[F>>3], + n-24-F)>>8; + temp=((uint8_t *)&crc)[2]; + ((uint8_t *)&crc)[2] = ((uint8_t *)&crc)[0]; + ((uint8_t *)&crc)[0] = temp; + break; + + case CRC24_B: + oldcrc&=0x00ffffff; + crc = crc24b(decoded_bytes, + n-24)>>8; + temp=((uint8_t *)&crc)[2]; + ((uint8_t *)&crc)[2] = ((uint8_t *)&crc)[0]; + ((uint8_t *)&crc)[0] = temp; + break; + + case CRC16: + oldcrc&=0x0000ffff; + crc = crc16(decoded_bytes, + n-16)>>16; + break; + + case CRC8: + oldcrc&=0x000000ff; + crc = crc8(decoded_bytes, + n-8)>>24; + break; + + default: + printf("FATAL: 3gpplte_turbo_decoder_sse.c: Unknown CRC\n"); + return(255); + break; } if (intl2_stats) stop_meas(intl2_stats); @@ -1372,13 +1284,13 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, if (iteration_cnt < max_iterations) { log_map8(systematic1,yparity1,m11,m10,alpha,beta,ext,n2,0,F,offset8_flag,alpha_stats,beta_stats,gamma_stats,ext_stats); #if defined(__x86_64__) || defined(__i386__) - __m128i* ext_128=(__m128i*) ext; - __m128i* s1_128=(__m128i*) systematic1; - __m128i* s0_128=(__m128i*) systematic0; + __m128i *ext_128=(__m128i *) ext; + __m128i *s1_128=(__m128i *) systematic1; + __m128i *s0_128=(__m128i *) systematic0; #elif defined(__arm__) - int8x16_t* ext_128=(int8x16_t*) ext; - int8x16_t* s1_128=(int8x16_t*) systematic1; - int8x16_t* s0_128=(int8x16_t*) systematic0; + int8x16_t *ext_128=(int8x16_t *) ext; + int8x16_t *s1_128=(int8x16_t *) systematic1; + int8x16_t *s0_128=(int8x16_t *) systematic0; #endif int myloop=n2>>4; @@ -1394,5 +1306,4 @@ uint8_t phy_threegpplte_turbo_decoder8(int16_t *y, } return(iteration_cnt); - } diff --git a/openair1/PHY/CODING/ccoding_byte.c b/openair1/PHY/CODING/ccoding_byte.c index db91e68dc8f..e4617d1404c 100644 --- a/openair1/PHY/CODING/ccoding_byte.c +++ b/openair1/PHY/CODING/ccoding_byte.c @@ -47,27 +47,20 @@ void ccodedot11_encode (unsigned int numbytes, unsigned char *inPtr, unsigned char *outPtr, - unsigned char puncturing) -{ + unsigned char puncturing) { unsigned int state; - unsigned char c, out, shiftbit =0; - // printf("In ccodedot11_encode (%d,%p,%p,%d)\n",numbytes,inPtr,outPtr,puncturing); - #ifdef DEBUG_CCODE unsigned int dummy; #endif //DEBUG_CCODE int bit_index; - /* The input bit is shifted in position 8 of the state. Shiftbit will take values between 1 and 8 */ state = 0; - #ifdef DEBUG_CCODE dummy = 0; #endif //DEBUG_CCODE - /* Do not increment inPtr until we read the next octet */ bit_index=0; @@ -78,85 +71,75 @@ ccodedot11_encode (unsigned int numbytes, #endif //DEBUG_CCODE switch (puncturing) { - case 0: //rate 1/2 - for (shiftbit = 0; shiftbit<8; shiftbit++) { - - state >>= 1; - - if ((c&(1<<shiftbit)) != 0) { - state |= 64; - } + case 0: //rate 1/2 + for (shiftbit = 0; shiftbit<8; shiftbit++) { + state >>= 1; - out = ccodedot11_table[state]; - - *outPtr++ = out & 1; - *outPtr++ = (out>>1)&1; + if ((c&(1<<shiftbit)) != 0) { + state |= 64; + } + out = ccodedot11_table[state]; + *outPtr++ = out & 1; + *outPtr++ = (out>>1)&1; #ifdef DEBUG_CCODE - printf("%d: %d -> %d (%d)\n",dummy,state,out,ccodedot11_table[state]); - dummy+=2; + printf("%u: %u -> %d (%u)\n",dummy,state,out,ccodedot11_table[state]); + dummy+=2; #endif //DEBUG_CCODE + } - } - - break; - - case 1: // rate 3/4 - for (shiftbit = 0; shiftbit<8; shiftbit++) { + break; - state >>= 1; + case 1: // rate 3/4 + for (shiftbit = 0; shiftbit<8; shiftbit++) { + state >>= 1; - if ((c&(1<<shiftbit)) != 0) { - state |= 64; - } + if ((c&(1<<shiftbit)) != 0) { + state |= 64; + } - out = ccodedot11_table[state]; + out = ccodedot11_table[state]; - if (bit_index<2) - *outPtr++ = out & 1; + if (bit_index<2) + *outPtr++ = out & 1; - if (bit_index!=1) - *outPtr++ = (out>>1)&1; + if (bit_index!=1) + *outPtr++ = (out>>1)&1; #ifdef DEBUG_CCODE - printf("%d: %d -> %d (%d)\n",dummy,state,out,ccodedot11_table[state]); - dummy+=2; + printf("%u: %u -> %d (%u)\n",dummy,state,out,ccodedot11_table[state]); + dummy+=2; #endif //DEBUG_CCODE + bit_index=(bit_index==2)?0:(bit_index+1); + } - bit_index=(bit_index==2)?0:(bit_index+1); - } - - break; - - case 2: // rate 2/3 - for (shiftbit = 0; shiftbit<8; shiftbit++) { - - state >>= 1; + break; - if ((c&(1<<shiftbit)) != 0) { - state |= 64; - } + case 2: // rate 2/3 + for (shiftbit = 0; shiftbit<8; shiftbit++) { + state >>= 1; - out = ccodedot11_table[state]; + if ((c&(1<<shiftbit)) != 0) { + state |= 64; + } - *outPtr++ = out & 1; + out = ccodedot11_table[state]; + *outPtr++ = out & 1; - if (bit_index==0) - *outPtr++ = (out>>1)&1; + if (bit_index==0) + *outPtr++ = (out>>1)&1; #ifdef DEBUG_CCODE - printf("%d: %d -> %d (%d)\n",dummy,state,out,ccodedot11_table[state]); - dummy+=2; + printf("%d: %u -> %d (%u)\n",dummy,state,out,ccodedot11_table[state]); + dummy+=2; #endif //DEBUG_CCODE + bit_index=(bit_index==0)?1:0; + } - bit_index=(bit_index==0)?1:0; - - } - - break; + break; - default: - break; + default: + break; } } @@ -181,8 +164,6 @@ ccodedot11_encode (unsigned int numbytes, } */ - - } @@ -197,8 +178,7 @@ ccodedot11_encode (unsigned int numbytes, /* Basic code table initialization for constraint length 7 */ /* Input in MSB, followed by state in 6 LSBs */ -void ccodedot11_init(void) -{ +void ccodedot11_init(void) { unsigned int i, j, k, sum; for (i = 0; i < 128; i++) { @@ -219,8 +199,7 @@ void ccodedot11_init(void) } /* Input in LSB, followed by state in 6 MSBs */ -void ccodedot11_init_inv(void) -{ +void ccodedot11_init_inv(void) { unsigned int i, j, k, sum; for (i = 0; i < 128; i++) { @@ -251,21 +230,15 @@ void ccodedot11_init_inv(void) #ifdef DEBUG_CCODE #include <stdio.h> -main() -{ +main() { unsigned char test[] = "0Thebigredfox"; unsigned char output[512], *inPtr, *outPtr; unsigned int i; - test[0] = 128; test[1] = 0; - - ccodedot11_init(); - inPtr = test; outPtr = output; - ccodedot11_encode(16, inPtr, outPtr,0); for (i = 0; i < 32; i++) printf("%x ", output[i]); diff --git a/openair1/PHY/CODING/ccoding_byte_lte.c b/openair1/PHY/CODING/ccoding_byte_lte.c index 870e9ba4700..f41b073a78d 100644 --- a/openair1/PHY/CODING/ccoding_byte_lte.c +++ b/openair1/PHY/CODING/ccoding_byte_lte.c @@ -52,20 +52,16 @@ ccodelte_encode (int32_t numbits, uint8_t add_crc, uint8_t *inPtr, uint8_t *outPtr, - uint16_t rnti) -{ + uint16_t rnti) { uint32_t state; - uint8_t c, out, first_bit; int8_t shiftbit=0; uint16_t c16; uint16_t next_last_byte=0; uint32_t crc=0; - #ifdef DEBUG_CCODE uint32_t dummy=0; #endif //DEBUG_CCODE - /* The input bit is shifted in position 8 of the state. Shiftbit will take values between 1 and 8 */ state = 0; @@ -137,17 +133,12 @@ ccodelte_encode (int32_t numbits, #endif //DEBUG_CCODE /* Do not increment inPtr until we read the next octet */ - - - while (numbits > 0) { - c = *inPtr++; #ifdef DEBUG_CCODE printf("** %x **\n",c); #endif //DEBUG_CCODE - // for (shiftbit = 0; (shiftbit<8) && (numbits>0);shiftbit++,numbits--) { for (shiftbit = 7; (shiftbit>=0) && (numbits>0); shiftbit--,numbits--) { state >>= 1; @@ -157,23 +148,18 @@ ccodelte_encode (int32_t numbits, } out = ccodelte_table[state]; - *outPtr++ = out & 1; *outPtr++ = (out>>1)&1; *outPtr++ = (out>>2)&1; - #ifdef DEBUG_CCODE printf("numbits %d, input %d, outbit %d: %d -> %d (%d%d%d)\n",numbits,state>>6,dummy,state,out,out&1,(out>>1)&1,(out>>2)&1); dummy+=3; #endif //DEBUG_CCODE - } - } // now code 8-bit CRC for UCI if (add_crc == 1) { - c = (uint8_t)(crc>>24); // for (shiftbit = 0; (shiftbit<8);shiftbit++) { @@ -185,22 +171,18 @@ ccodelte_encode (int32_t numbits, } out = ccodelte_table[state]; - *outPtr++ = out & 1; *outPtr++ = (out>>1)&1; *outPtr++ = (out>>2)&1; - #ifdef DEBUG_CCODE - printf("crc bit %d input %d, outbit %d: %d -> %d (%d)\n",shiftbit,state>>6,dummy,state,out,ccodelte_table[state]); + printf("crc bit %d input %d, outbit %d: %d -> %d (%u)\n",shiftbit,state>>6,dummy,state,out,ccodelte_table[state]); dummy+=3; #endif //DEBUG_CCODE - } } // now code 16-bit CRC for DCI if (add_crc == 2) { - c16 = (uint16_t)(crc>>16); // for (shiftbit = 0; (shiftbit<16);shiftbit++) { @@ -212,16 +194,13 @@ ccodelte_encode (int32_t numbits, } out = ccodelte_table[state]; - *outPtr++ = out & 1; *outPtr++ = (out>>1)&1; *outPtr++ = (out>>2)&1; - #ifdef DEBUG_CCODE - printf("crc bit %d input %d, outbit %d: %d -> %d (%d)\n",shiftbit,state>>6,dummy,state,out,ccodelte_table[state]); + printf("crc bit %d input %d, outbit %d: %d -> %d (%u)\n",shiftbit,state>>6,dummy,state,out,ccodelte_table[state]); dummy+=3; #endif //DEBUG_CCODE - } } } @@ -238,8 +217,7 @@ ccodelte_encode (int32_t numbits, /* Basic code table initialization for constraint length 7 */ /* Input in MSB, followed by state in 6 LSBs */ -void ccodelte_init(void) -{ +void ccodelte_init(void) { unsigned int i, j, k, sum; for (i = 0; i < 128; i++) { @@ -260,8 +238,7 @@ void ccodelte_init(void) } /* Input in LSB, followed by state in 6 MSBs */ -void ccodelte_init_inv(void) -{ +void ccodelte_init_inv(void) { unsigned int i, j, k, sum; for (i = 0; i < 128; i++) { @@ -281,8 +258,7 @@ void ccodelte_init_inv(void) } } -void ccodedab_init(void) -{ +void ccodedab_init(void) { unsigned int i, j, k, sum; for (i = 0; i < 128; i++) { @@ -303,8 +279,7 @@ void ccodedab_init(void) } /* Input in LSB, followed by state in 6 MSBs */ -void ccodedab_init_inv(void) -{ +void ccodedab_init_inv(void) { unsigned int i, j, k, sum; for (i = 0; i < 128; i++) { @@ -334,21 +309,15 @@ void ccodedab_init_inv(void) #ifdef CCODE_MAIN #include <stdio.h> -main() -{ +main() { unsigned char test[] = "Thebigredfox"; unsigned char output[512], *inPtr, *outPtr; unsigned int i; - test[0] = 128; test[1] = 0; - - ccodelte_init(); - inPtr = test; outPtr = output; - ccodelte_encode(21, inPtr, outPtr); for (i = 0; i < 21*3; i++) printf("%x ", output[i]); diff --git a/openair1/PHY/CODING/lte_rate_matching.c b/openair1/PHY/CODING/lte_rate_matching.c index af5d1a169e9..faa763b5d38 100644 --- a/openair1/PHY/CODING/lte_rate_matching.c +++ b/openair1/PHY/CODING/lte_rate_matching.c @@ -25,8 +25,8 @@ date: 21.10.2009 */ #ifdef MAIN -#include <stdio.h> -#include <stdlib.h> + #include <stdio.h> + #include <stdlib.h> #endif #include "PHY/defs_eNB.h" #include "PHY/LTE_TRANSPORT/transport_common.h" @@ -42,9 +42,7 @@ static uint32_t bitrev_cc[32] = {1,17,9,25,5,21,13,29,3,19,11,27,7,23,15,31,0,16 //#define RM_DEBUG2 1 //#define RM_DEBUG_CC 1 -uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w) -{ - +uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w) { uint32_t RTC = (D>>5), ND, ND3; uint32_t row,col,Kpi; uint32_t index3,k,k2; @@ -64,7 +62,6 @@ uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w) printf("RTC = %d, Kpi=%d, ND=%d\n",RTC,Kpi,ND); #endif ND3 = ND*3; - // copy d02 to dD2 (for mod Kpi operation from clause (4), p.16 of 36.212 d[(3*D)+2] = d[2]; k=0; @@ -80,12 +77,9 @@ uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w) index3 = bitrev_x3[col];//3*index; for (row=0; row<RTC; row++) { - w[k] = d1[index3];//d[index3-ND3]; w[Kpi+k2] = d2[index3];//d[index3-ND3+1]; w[Kpi+1+k2] = d3[index3];//d[index3-ND3+5]; - - #ifdef RM_DEBUG printf("row %d, index %d, index-Nd %d index-Nd+1 %d (k,Kpi+2k,Kpi+2k+1) (%d,%d,%d) w(%d,%d,%d)\n",row,index,index-ND,((index+1)%Kpi)-ND,k,Kpi+(k<<1),Kpi+(k<<1)+1,w[k],w[Kpi+(k<<1)],w[Kpi+1+(k<<1)]); @@ -100,7 +94,8 @@ uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w) #endif index3+=96; - k++;k2+=2; + k++; + k2+=2; } } @@ -120,9 +115,7 @@ uint32_t sub_block_interleaving_turbo(uint32_t D, uint8_t *d,uint8_t *w) } -uint32_t sub_block_interleaving_cc(uint32_t D, uint8_t *d,uint8_t *w) -{ - +uint32_t sub_block_interleaving_cc(uint32_t D, uint8_t *d,uint8_t *w) { uint32_t RCC = (D>>5), ND, ND3; uint32_t row,col,Kpi,index; uint32_t index3,k; @@ -141,7 +134,6 @@ uint32_t sub_block_interleaving_cc(uint32_t D, uint8_t *d,uint8_t *w) printf("RCC = %d, Kpi=%d, ND=%d\n",RCC,Kpi,ND); #endif ND3 = ND*3; - k=0; for (col=0; col<32; col++) { @@ -180,9 +172,7 @@ uint32_t sub_block_interleaving_cc(uint32_t D, uint8_t *d,uint8_t *w) return(RCC); } -void sub_block_deinterleaving_turbo(uint32_t D,int16_t *d,int16_t *w) -{ - +void sub_block_deinterleaving_turbo(uint32_t D,int16_t *d,int16_t *w) { uint32_t RTC = (D>>5), ND, ND3; uint32_t row,col,Kpi,index; uint32_t index3,k,k2; @@ -199,7 +189,6 @@ void sub_block_deinterleaving_turbo(uint32_t D,int16_t *d,int16_t *w) printf("RTC = %d, Kpi=%d, ND=%d\n",RTC,Kpi,ND); #endif ND3 = ND*3; - // copy d02 to dD2 (for mod Kpi operation from clause (4), p.16 of 36.212 k=0; k2=0; @@ -215,7 +204,6 @@ void sub_block_deinterleaving_turbo(uint32_t D,int16_t *d,int16_t *w) index3 = bitrev_x3[col];//3*index; for (row=0; row<RTC; row++) { - d1[index3] = w[k]; d2[index3] = w[Kpi+k2]; d3[index3] = w[Kpi+1+k2]; @@ -229,12 +217,9 @@ void sub_block_deinterleaving_turbo(uint32_t D,int16_t *d,int16_t *w) // if (ND>0) // d[2] = LTE_NULL;//d[(3*D)+2]; - } -void sub_block_deinterleaving_cc(uint32_t D,int8_t *d,int8_t *w) -{ - +void sub_block_deinterleaving_cc(uint32_t D,int8_t *d,int8_t *w) { //WANG_Hao uint32_t RCC = (D>>5), ND, ND3; uint32_t RCC = (D>>5); ptrdiff_t ND, ND3; @@ -251,10 +236,9 @@ void sub_block_deinterleaving_cc(uint32_t D,int8_t *d,int8_t *w) ND = Kpi - D; #ifdef RM_DEBUG2 printf("sub_block_interleaving_cc : D = %d (%d), d %p, w %p\n",D,D*3,d,w); - printf("RCC = %d, Kpi=%d, ND=%d\n",RCC,Kpi,ND); + printf("RCC = %d, Kpi=%d, ND=%ld\n",RCC,Kpi,ND); #endif ND3 = ND*3; - k=0; for (col=0; col<32; col++) { @@ -265,24 +249,20 @@ void sub_block_deinterleaving_cc(uint32_t D,int8_t *d,int8_t *w) index3 = 3*index; for (row=0; row<RCC; row++) { - d[index3-ND3] = w[k]; d[index3-ND3+1] = w[Kpi+k]; d[index3-ND3+2] = w[(Kpi<<1)+k]; #ifdef RM_DEBUG2 - printf("row %d, index %d k %d index3-ND3 %d w(%d,%d,%d)\n",row,index,k,index3-ND3,w[k],w[Kpi+k],w[(Kpi<<1)+k]); + printf("row %d, index %d k %d index3-ND3 %ld w(%d,%d,%d)\n",row,index,k,index3-ND3,w[k],w[Kpi+k],w[(Kpi<<1)+k]); #endif index3+=96; index+=32; k++; } } - } -uint32_t generate_dummy_w(uint32_t D, uint8_t *w,uint8_t F) -{ - +uint32_t generate_dummy_w(uint32_t D, uint8_t *w,uint8_t F) { uint32_t RTC = (D>>5), ND; uint32_t col,Kpi,index; int32_t k,k2; @@ -301,8 +281,6 @@ uint32_t generate_dummy_w(uint32_t D, uint8_t *w,uint8_t F) printf("dummy sub_block_interleaving_turbo : D = %d (%d)\n",D,D*3); printf("RTC = %d, Kpi=%d, ND=%d, F=%d (Nulled %d)\n",RTC,Kpi,ND,F,(2*F + 3*ND)); #endif - - k=0; k2=0; wKpi = &w[Kpi]; @@ -371,9 +349,7 @@ uint32_t generate_dummy_w(uint32_t D, uint8_t *w,uint8_t F) return(RTC); } -uint32_t generate_dummy_w_cc(uint32_t D, uint8_t *w) -{ - +uint32_t generate_dummy_w_cc(uint32_t D, uint8_t *w) { uint32_t RCC = (D>>5), ND; uint32_t col,Kpi,index; int32_t k; @@ -392,7 +368,6 @@ uint32_t generate_dummy_w_cc(uint32_t D, uint8_t *w) printf("RCC = %d, Kpi=%d, ND=%d, (Nulled %d)\n",RCC,Kpi,ND,3*ND); #endif // ND3 = ND*3; - // copy d02 to dD2 (for mod Kpi operation from clause (4), p.16 of 36.212 k=0; @@ -466,8 +441,6 @@ uint32_t lte_rate_matching_turbo(uint32_t RTC, uint8_t nb_rb) // uint8_t m) { - - uint32_t Nir,Ncb,Gp,GpmodC,E,Ncbmod,ind,k; // int cnt=0; uint8_t *e2; @@ -487,11 +460,11 @@ uint32_t lte_rate_matching_turbo(uint32_t RTC, if (Mdlharq>0) { // Downlink Nir = Nsoft/Kmimo/cmin(8,Mdlharq); Ncb = cmin(Nir/C,3*(RTC<<5)); - } - else { // Uplink + } else { // Uplink Nir=0; Ncb = 3*(RTC<<5); // Kw } + #ifdef RM_DEBUG_TX if (rvidx==0 && r==0) { @@ -518,7 +491,6 @@ uint32_t lte_rate_matching_turbo(uint32_t RTC, AssertFatal(Qm>0,"Qm is 0\n"); Gp = G/Nl/Qm; GpmodC = Gp%C; - #ifdef RM_DEBUG printf("lte_rate_matching_turbo: Ncb %d, Kw %d, Nir/C %d, rvidx %d, G %d, Qm %d, Nl%d, r %d\n",Ncb,3*(RTC<<5),Nir/C,rvidx, G, Qm,Nl,r); #endif @@ -529,16 +501,12 @@ uint32_t lte_rate_matching_turbo(uint32_t RTC, E = Nl*Qm * ((GpmodC==0?0:1) + (Gp/C)); Ncbmod = Ncb%(RTC<<3); - ind = RTC * (2+(rvidx*(((Ncbmod==0)?0:1) + (Ncb/(RTC<<3)))*2)); - #ifdef RM_DEBUG_TX printf("lte_rate_matching_turbo: E %d, k0 %d, Ncbmod %d, Ncb/(RTC<<3) %d\n",E,ind,Ncbmod,Ncb/(RTC<<3)); #endif - //e2=e+(r*E); e2 = e; - k=0; for (; (ind<Ncb)&&(k<E); ind++) { @@ -633,25 +601,16 @@ uint32_t lte_rate_matching_turbo(uint32_t RTC, uint32_t lte_rate_matching_cc(uint32_t RCC, uint16_t E, uint8_t *w, - uint8_t *e) -{ - - + uint8_t *e) { uint32_t ind=0,k; - uint16_t Kw = 3*(RCC<<5); - #ifdef RM_DEBUG_CC uint32_t nulled=0; - printf("lte_rate_matching_cc: Kw %d, E %d\n",Kw, E); #endif for (k=0; k<E; k++) { - - while(w[ind] == LTE_NULL) { - #ifdef RM_DEBUG_CC nulled++; printf("RM_TX_CC : ind %d, NULL\n",ind); @@ -662,7 +621,6 @@ uint32_t lte_rate_matching_cc(uint32_t RCC, ind=0; } - e[k] = w[ind]; #ifdef RM_DEBUG_CC // printf("k %d ind %d, w %c(%d)\n",k,ind,w[ind],w[ind]); @@ -695,10 +653,7 @@ int lte_rate_matching_turbo_rx(uint32_t RTC, uint8_t Qm, uint8_t Nl, uint8_t r, - uint32_t *E_out) -{ - - + uint32_t *E_out) { uint32_t Nir,Ncb,Gp,GpmodC,E,Ncbmod,ind,k; int16_t *soft_input2; // int32_t w_tmp; @@ -708,15 +663,14 @@ int lte_rate_matching_turbo_rx(uint32_t RTC, if (Kmimo==0 || C==0 || Qm==0 || Nl==0) { printf("lte_rate_matching.c: invalid parameters (Kmimo %d, Mdlharq %d, C %d, Qm %d, Nl %d\n", - Kmimo,Mdlharq,C,Qm,Nl); + Kmimo,Mdlharq,C,Qm,Nl); return(-1); } if (Mdlharq>0) { // Downlink Nir = Nsoft/Kmimo/cmin(8,Mdlharq); Ncb = cmin(Nir/C,3*(RTC<<5)); - } - else { // Uplink + } else { // Uplink Nir=0; Ncb = 3*(RTC<<5); } @@ -726,17 +680,13 @@ int lte_rate_matching_turbo_rx(uint32_t RTC, Gp = G/Nl/Qm; GpmodC = Gp%C; - - if (r < (C-(GpmodC))) E = Nl*Qm * (Gp/C); else E = Nl*Qm * ((GpmodC==0?0:1) + (Gp/C)); Ncbmod = Ncb%(RTC<<3); - ind = RTC * (2+(rvidx*(((Ncbmod==0)?0:1) + (Ncb/(RTC<<3)))*2)); - #ifdef RM_DEBUG printf("lte_rate_matching_turbo_rx: Clear %d, E %d, Ncb %d, Kw %d, rvidx %d, G %d, Qm %d, Nl%d, r %d\n",clear,E,Ncb,3*(RTC<<5),rvidx, G, Qm,Nl,r); #endif @@ -751,8 +701,8 @@ int lte_rate_matching_turbo_rx(uint32_t RTC, if (dummy_w[ind] != LTE_NULL) { /* if ((w[ind]>0 && soft_input2[k]<0) || - (w[ind]<0 && soft_input2[k]>0)) - printf("ind %d: w %d => soft_in %d\n",ind,w[ind],soft_input2[k]);*/ + (w[ind]<0 && soft_input2[k]>0)) + printf("ind %d: w %d => soft_in %d\n",ind,w[ind],soft_input2[k]);*/ w[ind] += soft_input2[k++]; #ifdef RM_DEBUG printf("RM_RX k%d Ind: %d (%d)\n",k-1,ind,w[ind]); @@ -831,10 +781,8 @@ int lte_rate_matching_turbo_rx(uint32_t RTC, ind=0; } */ - *E_out = E; return(0); - } @@ -842,28 +790,19 @@ void lte_rate_matching_cc_rx(uint32_t RCC, uint16_t E, int8_t *w, uint8_t *dummy_w, - int8_t *soft_input) -{ - - - + int8_t *soft_input) { uint32_t ind=0,k; uint16_t Kw = 3*(RCC<<5); uint32_t acc=1; int16_t w16[Kw]; #ifdef RM_DEBUG_CC uint32_t nulled=0; - printf("lte_rate_matching_cc_rx: Kw %d, E %d, w %p, soft_input %p\n",3*(RCC<<5),E,w,soft_input); #endif - - memset(w,0,Kw); memset(w16,0,Kw*sizeof(int16_t)); for (k=0; k<E; k++) { - - while(dummy_w[ind] == LTE_NULL) { #ifdef RM_DEBUG_CC nulled++; @@ -883,10 +822,7 @@ void lte_rate_matching_cc_rx(uint32_t RCC, #ifdef RM_DEBUG_CC printf("RM_RX_CC k %d (%d) ind: %d (%d)\n",k,soft_input[k],ind,w16[ind]); #endif - - w16[ind] += soft_input[k]; - ind++; if (ind==Kw) { @@ -907,7 +843,6 @@ void lte_rate_matching_cc_rx(uint32_t RCC, } #ifdef RM_DEBUG_CC - printf("Nulled %d\n",nulled); #endif } @@ -915,8 +850,7 @@ void lte_rate_matching_cc_rx(uint32_t RCC, #ifdef MAIN -void main() -{ +void main() { uint8_t d[96+3+(3*6144)]; uint8_t w[3*6144],e[12*6144]; uint32_t RTC,G,rvidx; @@ -924,7 +858,6 @@ void main() uint32_t mod_order = 4; uint32_t first_dlsch_symbol = 2; uint32_t i; - G = ( nb_rb * (12 * mod_order) * (12-first_dlsch_symbol-3)) ;//( nb_rb * (12 * mod_order) * (14-first_dlsch_symbol-3)) : // initialize 96 first positions to "LTE_NULL" diff --git a/openair1/PHY/CODING/lte_segmentation.c b/openair1/PHY/CODING/lte_segmentation.c index ba79b0171e4..4dc531c58f9 100644 --- a/openair1/PHY/CODING/lte_segmentation.c +++ b/openair1/PHY/CODING/lte_segmentation.c @@ -38,9 +38,7 @@ int lte_segmentation(unsigned char *input_buffer, unsigned int *Cminus, unsigned int *Kplus, unsigned int *Kminus, - unsigned int *F) -{ - + unsigned int *F) { unsigned int L,Bprime,Bprime_by_C,r,Kr,k,s,crc; if (B<=6144) { @@ -56,19 +54,19 @@ int lte_segmentation(unsigned char *input_buffer, Bprime = B+((*C)*L); #ifdef DEBUG_SEGMENTATION - printf("Bprime %d\n",Bprime); + printf("Bprime %u\n",Bprime); #endif } if ((*C)>MAX_NUM_DLSCH_SEGMENTS) { - LOG_E(PHY,"lte_segmentation.c: too many segments %d, B %d, L %d, Bprime %d\n",*C,B,L,Bprime); + LOG_E(PHY,"lte_segmentation.c: too many segments %d, B %d, L %d, Bprime %d\n",*C,B,L,Bprime); return(-1); } // Find K+ Bprime_by_C = Bprime/(*C); #ifdef DEBUG_SEGMENTATION - printf("Bprime_by_C %d\n",Bprime_by_C); + printf("Bprime_by_C %u\n",Bprime_by_C); #endif // Bprime = Bprime_by_C>>3; @@ -93,17 +91,16 @@ int lte_segmentation(unsigned char *input_buffer, *Kminus = (*Kplus - 32); } else if (Bprime_by_C <=6144 ) { // increase by 8 bytes til here - *Kplus = (Bprime_by_C>>6)<<6; #ifdef DEBUG_SEGMENTATION - printf("Bprime_by_C_by_C %d , Kplus %d\n",Bprime_by_C,*Kplus); + printf("Bprime_by_C_by_C %u , Kplus %u\n",Bprime_by_C,*Kplus); #endif if (*Kplus < Bprime_by_C) *Kplus = *Kplus + 64; #ifdef DEBUG_SEGMENTATION - printf("Bprime_by_C_by_C %d , Kplus2 %d\n",Bprime_by_C,*Kplus); + printf("Bprime_by_C_by_C %u , Kplus2 %u\n",Bprime_by_C,*Kplus); #endif *Kminus = (*Kplus - 64); } else { @@ -116,25 +113,21 @@ int lte_segmentation(unsigned char *input_buffer, *Kminus = 0; *Cminus = 0; } else { - // printf("More than one segment (%d), exiting \n",*C); // exit(-1); *Cminus = ((*C)*(*Kplus) - (Bprime))/((*Kplus) - (*Kminus)); *Cplus = (*C) - (*Cminus); } - AssertFatal(Bprime <= (*Cplus)*(*Kplus) + (*Cminus)*(*Kminus), - "Bprime %d < (*Cplus %d)*(*Kplus %d) + (*Cminus %d)*(*Kminus %d)\n", - Bprime,*Cplus,*Kplus,*Cminus,*Kminus); - + "Bprime %d < (*Cplus %d)*(*Kplus %d) + (*Cminus %d)*(*Kminus %d)\n", + Bprime,*Cplus,*Kplus,*Cminus,*Kminus); *F = ((*Cplus)*(*Kplus) + (*Cminus)*(*Kminus) - (Bprime)); #ifdef DEBUG_SEGMENTATION - printf("C %d, Cplus %d, Cminus %d, Kplus %d, Kminus %d, Bprime_bytes %d, Bprime %d, F %d\n",*C,*Cplus,*Cminus,*Kplus,*Kminus,Bprime>>3,Bprime,*F); + printf("C %u, Cplus %u, Cminus %u, Kplus %u, Kminus %u, Bprime_bytes %u, Bprime %u, F %u\n",*C,*Cplus,*Cminus,*Kplus,*Kminus,Bprime>>3,Bprime,*F); #endif if ((input_buffer) && (output_buffers)) { - for (k=0; k<*F>>3; k++) { output_buffers[0][k] = 0; } @@ -142,7 +135,6 @@ int lte_segmentation(unsigned char *input_buffer, s=0; for (r=0; r<*C; r++) { - if (r<*Cminus) Kr = *Kminus; else @@ -150,18 +142,18 @@ int lte_segmentation(unsigned char *input_buffer, while (k<((Kr - L)>>3)) { output_buffers[r][k] = input_buffer[s]; - // printf("encoding segment %d : byte %d (%d) => %d\n",r,k,Kr>>3,input_buffer[s]); + // printf("encoding segment %d : byte %d (%d) => %d\n",r,k,Kr>>3,input_buffer[s]); k++; s++; } if (*C > 1) { // add CRC crc = crc24b(output_buffers[r],Kr-24)>>8; - output_buffers[r][(Kr-24)>>3] = ((uint8_t*)&crc)[2]; - output_buffers[r][1+((Kr-24)>>3)] = ((uint8_t*)&crc)[1]; - output_buffers[r][2+((Kr-24)>>3)] = ((uint8_t*)&crc)[0]; + output_buffers[r][(Kr-24)>>3] = ((uint8_t *)&crc)[2]; + output_buffers[r][1+((Kr-24)>>3)] = ((uint8_t *)&crc)[1]; + output_buffers[r][2+((Kr-24)>>3)] = ((uint8_t *)&crc)[0]; #ifdef DEBUG_SEGMENTATION - printf("Segment %d : CRC %x\n",r,crc); + printf("Segment %u : CRC %x\n",r,crc); #endif } @@ -175,9 +167,7 @@ int lte_segmentation(unsigned char *input_buffer, #ifdef MAIN -main() -{ - +main() { unsigned int Kplus,Kminus,C,Cplus,Cminus,F,Bbytes; for (Bbytes=5; Bbytes<2*768; Bbytes++) { diff --git a/openair1/PHY/LTE_ESTIMATION/lte_dl_channel_estimation.c b/openair1/PHY/LTE_ESTIMATION/lte_dl_channel_estimation.c index f7c4a3ead31..e50af7cdd8f 100644 --- a/openair1/PHY/LTE_ESTIMATION/lte_dl_channel_estimation.c +++ b/openair1/PHY/LTE_ESTIMATION/lte_dl_channel_estimation.c @@ -34,8 +34,7 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, unsigned char Ns, unsigned char p, unsigned char l, - unsigned char symbol) -{ + unsigned char symbol) { int pilot[2][200] __attribute__((aligned(16))); unsigned char nu,aarx; unsigned short k; @@ -45,16 +44,14 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, // unsigned int n; // int i; static int interpolateS11S12 = 1; - uint16_t Nid_cell = (eNB_offset == 0) ? ue->frame_parms.Nid_cell : ue->measurements.adj_cell_id[eNB_offset-1]; - uint8_t nushift,pilot0,pilot1,pilot2,pilot3; uint8_t previous_thread_id = ue->current_thread_id[Ns>>1]==0 ? (RX_NB_TH-1):(ue->current_thread_id[Ns>>1]-1); int **dl_ch_estimates =ue->common_vars.common_vars_rx_data_per_thread[ue->current_thread_id[Ns>>1]].dl_ch_estimates[eNB_offset]; int **dl_ch_estimates_previous=ue->common_vars.common_vars_rx_data_per_thread[previous_thread_id].dl_ch_estimates[eNB_offset]; int **rxdataF=ue->common_vars.common_vars_rx_data_per_thread[ue->current_thread_id[Ns>>1]].rxdataF; - pilot0 = 0; + if (ue->frame_parms.Ncp == 0) { // normal prefix pilot1 = 4; pilot2 = 7; @@ -81,7 +78,6 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, return(-1); } - //ch_offset = (l*(ue->frame_parms.ofdm_symbol_size)); if (ue->high_speed_flag == 0) // use second channel estimate position for temporary storage ch_offset = ue->frame_parms.ofdm_symbol_size ; @@ -89,93 +85,88 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, ch_offset = ue->frame_parms.ofdm_symbol_size*symbol; symbol_offset = ue->frame_parms.ofdm_symbol_size*symbol; - k = (nu + nushift)%6; - #ifdef DEBUG_CH - printf("Channel Estimation : ThreadId %d, eNB_offset %d cell_id %d ch_offset %d, OFDM size %d, Ncp=%d, l=%d, Ns=%d, k=%d\n",ue->current_thread_id[Ns>>1], eNB_offset,Nid_cell,ch_offset,ue->frame_parms.ofdm_symbol_size, + printf("Channel Estimation : ThreadId %d, eNB_offset %d cell_id %d ch_offset %d, OFDM size %d, Ncp=%d, l=%d, Ns=%d, k=%d\n",ue->current_thread_id[Ns>>1], eNB_offset,Nid_cell,ch_offset, + ue->frame_parms.ofdm_symbol_size, ue->frame_parms.Ncp,l,Ns,k); #endif switch (k) { - case 0 : - f=filt24_0; //for first pilot of RB, first half - f2=filt24_2; //for second pilot of RB, first half - fl=filt24_0; //for first pilot of leftmost RB - f2l2=filt24_2; - // fr=filt24_2r; //for first pilot of rightmost RB - fr=filt24_0r2; //for first pilot of rightmost RB - // f2r2=filt24_0r2; - f2r2=filt24_2r; - - f_dc=filt24_0_dcr; - f2_dc=filt24_2_dcl; - - break; - - case 1 : - f=filt24_1; - f2=filt24_3; - fl=filt24_1l; - f2l2=filt24_3l2; - fr=filt24_1r2; - f2r2=filt24_3r; - f_dc=filt24_1_dcr; //for first pilot of RB, first half - f2_dc=filt24_3_dcl; //for first pilot of RB, first half - break; - - case 2 : - f=filt24_2; - f2=filt24_4; - fl=filt24_2l; - f2l2=filt24_4l2; - fr=filt24_2r2; - f2r2=filt24_4r; - f_dc=filt24_2_dcr; //for first pilot of RB, first half - f2_dc=filt24_4_dcl; //for first pilot of RB, first half - break; - - case 3 : - f=filt24_3; - f2=filt24_5; - fl=filt24_3l; - f2l2=filt24_5l2; - fr=filt24_3r2; - f2r2=filt24_5r; - f_dc=filt24_3_dcr; //for first pilot of RB, first half - f2_dc=filt24_5_dcl; //for first pilot of RB, first half - break; - - case 4 : - f=filt24_4; - f2=filt24_6; - fl=filt24_4l; - f2l2=filt24_6l2; - fr=filt24_4r2; - f2r2=filt24_6r; - f_dc=filt24_4_dcr; //for first pilot of RB, first half - f2_dc=filt24_6_dcl; //for first pilot of RB, first half - break; - - case 5 : - f=filt24_5; - f2=filt24_7; - fl=filt24_5l; - f2l2=filt24_7l2; - fr=filt24_5r2; - f2r2=filt24_7r; - f_dc=filt24_5_dcr; //for first pilot of RB, first half - f2_dc=filt24_7_dcl; //for first pilot of RB, first half - break; - - default: - LOG_E(PHY,"lte_dl_channel_estimation: k=%d -> ERROR\n",k); - return(-1); - break; + case 0 : + f=filt24_0; //for first pilot of RB, first half + f2=filt24_2; //for second pilot of RB, first half + fl=filt24_0; //for first pilot of leftmost RB + f2l2=filt24_2; + // fr=filt24_2r; //for first pilot of rightmost RB + fr=filt24_0r2; //for first pilot of rightmost RB + // f2r2=filt24_0r2; + f2r2=filt24_2r; + f_dc=filt24_0_dcr; + f2_dc=filt24_2_dcl; + break; + + case 1 : + f=filt24_1; + f2=filt24_3; + fl=filt24_1l; + f2l2=filt24_3l2; + fr=filt24_1r2; + f2r2=filt24_3r; + f_dc=filt24_1_dcr; //for first pilot of RB, first half + f2_dc=filt24_3_dcl; //for first pilot of RB, first half + break; + + case 2 : + f=filt24_2; + f2=filt24_4; + fl=filt24_2l; + f2l2=filt24_4l2; + fr=filt24_2r2; + f2r2=filt24_4r; + f_dc=filt24_2_dcr; //for first pilot of RB, first half + f2_dc=filt24_4_dcl; //for first pilot of RB, first half + break; + + case 3 : + f=filt24_3; + f2=filt24_5; + fl=filt24_3l; + f2l2=filt24_5l2; + fr=filt24_3r2; + f2r2=filt24_5r; + f_dc=filt24_3_dcr; //for first pilot of RB, first half + f2_dc=filt24_5_dcl; //for first pilot of RB, first half + break; + + case 4 : + f=filt24_4; + f2=filt24_6; + fl=filt24_4l; + f2l2=filt24_6l2; + fr=filt24_4r2; + f2r2=filt24_6r; + f_dc=filt24_4_dcr; //for first pilot of RB, first half + f2_dc=filt24_6_dcl; //for first pilot of RB, first half + break; + + case 5 : + f=filt24_5; + f2=filt24_7; + fl=filt24_5l; + f2l2=filt24_7l2; + fr=filt24_5r2; + f2r2=filt24_7r; + f_dc=filt24_5_dcr; //for first pilot of RB, first half + f2_dc=filt24_7_dcl; //for first pilot of RB, first half + break; + + default: + LOG_E(PHY,"lte_dl_channel_estimation: k=%d -> ERROR\n",k); + return(-1); + break; } - - // generate pilot lte_dl_cell_spec_rx(ue, eNB_offset, @@ -184,27 +175,25 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, (l==0)?0:1, p); - for (aarx=0; aarx<ue->frame_parms.nb_antennas_rx; aarx++) { - pil = (int16_t *)&pilot[p][0]; rxF = (int16_t *)&rxdataF[aarx][((symbol_offset+k+ue->frame_parms.first_carrier_offset))]; dl_ch = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][ch_offset]; - - // if (eNb_id==0) memset(dl_ch,0,4*(ue->frame_parms.ofdm_symbol_size)); + if (ue->high_speed_flag==0) // multiply previous channel estimate by ch_est_alpha multadd_complex_vector_real_scalar(dl_ch-(ue->frame_parms.ofdm_symbol_size<<1), ue->ch_est_alpha,dl_ch-(ue->frame_parms.ofdm_symbol_size<<1), 1,ue->frame_parms.ofdm_symbol_size); + #ifdef DEBUG_CH printf("k %d, first_carrier %d\n",k,ue->frame_parms.first_carrier_offset); #endif + if ((ue->frame_parms.N_RB_DL==6) || (ue->frame_parms.N_RB_DL==50) || (ue->frame_parms.N_RB_DL==100)) { - //First half of pilots // Treat first 2 pilots specially (left edge) ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); @@ -219,7 +208,6 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, pil+=2; // Re Im rxF+=12; dl_ch+=8; - ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); #ifdef DEBUG_CH @@ -234,28 +222,22 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, dl_ch+=16; for (pilot_cnt=2; pilot_cnt<((ue->frame_parms.N_RB_DL)-1); pilot_cnt+=2) { - - - ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); //Re ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); //Im #ifdef DEBUG_CH - printf("pilot %d : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); + printf("pilot %u : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); #endif multadd_real_vector_complex_scalar(f, ch, dl_ch, 24); - - pil+=2; // Re Im rxF+=12; dl_ch+=8; - ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); #ifdef DEBUG_CH - printf("pilot %d : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+1,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); + printf("pilot %u : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+1,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); #endif multadd_real_vector_complex_scalar(f2, ch, @@ -264,29 +246,25 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, pil+=2; rxF+=12; dl_ch+=16; - } // printf("Second half\n"); // Second half of RBs - k = (nu + nushift)%6; if (k > 6) k -=6; rxF = (int16_t *)&rxdataF[aarx][((symbol_offset+1+k))]; - #ifdef DEBUG_CH printf("second half k %d\n",k); #endif - for (pilot_cnt=0; pilot_cnt<((ue->frame_parms.N_RB_DL)-3); pilot_cnt+=2) { - + for (pilot_cnt=0; pilot_cnt<((ue->frame_parms.N_RB_DL)-3); pilot_cnt+=2) { ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); #ifdef DEBUG_CH - printf("pilot %d : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); + printf("pilot %u : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); #endif multadd_real_vector_complex_scalar(f, ch, @@ -295,11 +273,10 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, pil+=2; rxF+=12; dl_ch+=8; - ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); #ifdef DEBUG_CH - printf("pilot %d : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+1,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); + printf("pilot %u : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+1,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); #endif multadd_real_vector_complex_scalar(f2, ch, @@ -308,13 +285,12 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, pil+=2; rxF+=12; dl_ch+=16; - } ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); #ifdef DEBUG_CH - printf("pilot %d: rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); + printf("pilot %u: rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); #endif multadd_real_vector_complex_scalar(fr, ch, @@ -323,34 +299,25 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, pil+=2; // Re Im rxF+=12; dl_ch+=8; - ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); #ifdef DEBUG_CH - printf("pilot %d: rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+1,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); + printf("pilot %u: rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+1,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); #endif multadd_real_vector_complex_scalar(f2r2, ch, dl_ch, 24); - - - } - - else if (ue->frame_parms.N_RB_DL==25) { + } else if (ue->frame_parms.N_RB_DL==25) { //printf("Channel estimation\n"); - // Treat first 2 pilots specially (left edge) ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); - #ifdef DEBUG_CH printf("pilot 0 : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); - // ch[0] = 1024; // ch[1] = -128; #endif - multadd_real_vector_complex_scalar(fl, ch, dl_ch, @@ -358,17 +325,13 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, pil+=2; // Re Im rxF+=12; dl_ch+=8; - ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); - #ifdef DEBUG_CH printf("pilot 1 : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); - // ch[0] = 1024; // ch[1] = -128; #endif - multadd_real_vector_complex_scalar(f2l2, ch, dl_ch, @@ -378,21 +341,15 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, dl_ch+=16; for (pilot_cnt=2; pilot_cnt<24; pilot_cnt+=2) { - // printf("pilot[%d][%d] (%d,%d)\n",p,rb,pil[0],pil[1]); // printf("rx[%d][%d] -> (%d,%d)\n",p,ue->frame_parms.first_carrier_offset + ue->frame_parms.nushift + 6*rb+(3*p),rxF[0],rxF[1]); - ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); - #ifdef DEBUG_CH - printf("pilot %d : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); - + printf("pilot %u : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); // ch[0] = 1024; // ch[1] = -128; #endif - - multadd_real_vector_complex_scalar(f, ch, dl_ch, @@ -400,13 +357,10 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, pil+=2; // Re Im rxF+=12; dl_ch+=8; - ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); - #ifdef DEBUG_CH - printf("pilot %d : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+1,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); - + printf("pilot %u : rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",pilot_cnt+1,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); // ch[0] = 1024; // ch[1] = -128; #endif @@ -417,39 +371,31 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, pil+=2; rxF+=12; dl_ch+=16; - } ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); #ifdef DEBUG_CH printf("pilot 24: rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); - // ch[0] = 1024; // ch[1] = -128; #endif - - multadd_real_vector_complex_scalar(f_dc, ch, dl_ch, 24); pil+=2; // Re Im dl_ch+=8; - // printf("Second half\n"); // Second half of RBs rxF = (int16_t *)&rxdataF[aarx][((symbol_offset+1+k))]; - ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); #ifdef DEBUG_CH printf("pilot 25: rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); - // ch[0] = 1024; // ch[1] = -128; #endif - multadd_real_vector_complex_scalar(f2_dc, ch, dl_ch, @@ -459,19 +405,15 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, dl_ch+=16; for (pilot_cnt=0; pilot_cnt<22; pilot_cnt+=2) { - // printf("* pilot[%d][%d] (%d,%d)\n",p,rb,pil[0],pil[1]); // printf("rx[%d][%d] -> (%d,%d)\n",p,ue->frame_parms.first_carrier_offset + ue->frame_parms.nushift + 6*rb+(3*p),rxF[0],rxF[1]); - ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); #ifdef DEBUG_CH - printf("pilot %d rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",26+pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); - + printf("pilot %u rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",26+pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); // ch[0] = 1024; // ch[1] = -128; #endif - multadd_real_vector_complex_scalar(f, ch, dl_ch, @@ -479,16 +421,13 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, pil+=2; rxF+=12; dl_ch+=8; - ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); #ifdef DEBUG_CH - printf("pilot %d : rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",27+pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); - + printf("pilot %u : rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",27+pilot_cnt,rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); // ch[0] = 1024; // ch[1] = -128; #endif - multadd_real_vector_complex_scalar(f2, ch, dl_ch, @@ -496,20 +435,15 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, pil+=2; rxF+=12; dl_ch+=16; - } ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); - #ifdef DEBUG_CH printf("pilot 49: rxF -> (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); - // ch[0] = 1024; // ch[1] = -128; #endif - - multadd_real_vector_complex_scalar(fr, ch, dl_ch, @@ -517,28 +451,20 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, pil+=2; // Re Im rxF+=12; dl_ch+=8; - ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); - #ifdef DEBUG_CH - printf("pilot 50: rxF - > (%d,%d) ch -> (%d,%d), pil -> (%d,%d) \n",rxF[0],rxF[1],ch[0],ch[1],pil[0],pil[1]); - // ch[0] = 1024; // ch[1] = -128; #endif - multadd_real_vector_complex_scalar(f2r2, ch, dl_ch, 24); - } else if (ue->frame_parms.N_RB_DL==15) { - //printf("First Half\n"); for (rb=0; rb<28; rb+=4) { - //printf("aarx=%d\n",aarx); //printf("pilot[%d][%d] (%d,%d)\n",p,rb,pil[0],pil[1]); //printf("rx[%d][%d] -> (%d,%d)\n",p, @@ -555,7 +481,6 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, pil+=2; // Re Im rxF+=12; dl_ch+=8; - ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); //printf("ch -> (%d,%d)\n",ch[0],ch[1]); @@ -566,7 +491,6 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, pil+=2; rxF+=12; dl_ch+=16; - } ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); @@ -578,13 +502,11 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, 24); pil+=2; // Re Im dl_ch+=8; - //printf("Second half\n"); //Second half of RBs rxF = (int16_t *)&rxdataF[aarx][((symbol_offset+1+nushift + (3*p)))]; ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); - multadd_real_vector_complex_scalar(f2, ch, dl_ch, @@ -602,7 +524,6 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, // rxF[1]); ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); - multadd_real_vector_complex_scalar(f, ch, dl_ch, @@ -610,10 +531,8 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, pil+=2; rxF+=12; dl_ch+=8; - ch[0] = (int16_t)(((int32_t)pil[0]*rxF[0] - (int32_t)pil[1]*rxF[1])>>15); ch[1] = (int16_t)(((int32_t)pil[0]*rxF[1] + (int32_t)pil[1]*rxF[0])>>15); - multadd_real_vector_complex_scalar(f2, ch, dl_ch, @@ -621,17 +540,14 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, pil+=2; rxF+=12; dl_ch+=16; - } } else { LOG_E(PHY,"channel estimation not implemented for ue->frame_parms.N_RB_DL = %d\n",ue->frame_parms.N_RB_DL); } - if (ue->perfect_ce == 0) { // Temporal Interpolation // printf("ch_offset %d\n",ch_offset); - dl_ch = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][ch_offset]; if (ue->high_speed_flag == 0) { @@ -639,182 +555,155 @@ int lte_dl_channel_estimation(PHY_VARS_UE *ue, 32767-ue->ch_est_alpha, dl_ch-(ue->frame_parms.ofdm_symbol_size<<1),0,ue->frame_parms.ofdm_symbol_size); } else { // high_speed_flag == 1 - if ((symbol == 0)) { - // printf("Interpolating %d->0\n",4-ue->frame_parms.Ncp); - // dl_ch_prev = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][(4-ue->frame_parms.Ncp)*(ue->frame_parms.ofdm_symbol_size)]; - if(((Ns>>1)!=0) || ( ((Ns>>1)==0) && interpolateS11S12)) - { - //LOG_I(PHY,"Interpolate s11-->s0 to get s12 and s13 Ns %d \n", Ns); - dl_ch_prev = (int16_t *)&dl_ch_estimates_previous[(p<<1)+aarx][pilot3*(ue->frame_parms.ofdm_symbol_size)]; - - multadd_complex_vector_real_scalar(dl_ch_prev,21845,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size); - multadd_complex_vector_real_scalar(dl_ch,10923,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size); - - multadd_complex_vector_real_scalar(dl_ch_prev,10923,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),1,ue->frame_parms.ofdm_symbol_size); - multadd_complex_vector_real_scalar(dl_ch,21845,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size); - } - - interpolateS11S12 = 1; - } // this is 1/3,2/3 combination for pilots spaced by 3 symbols - else if (symbol == pilot1) { - dl_ch_prev = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][0]; - - //LOG_I(PHY,"Interpolate s0-->s4 to get s1 s2 and s3 Ns %d \n", Ns); - if (ue->frame_parms.Ncp==0) {// pilot spacing 4 symbols (1/4,1/2,3/4 combination) - - uint8_t previous_subframe; - if(Ns>>1 == 0) - previous_subframe = 9; - else - previous_subframe = ((Ns>>1) - 1 )%9; - - if((subframe_select(&ue->frame_parms,previous_subframe) == SF_UL)) - { - - multadd_complex_vector_real_scalar(dl_ch_prev,328,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size); - multadd_complex_vector_real_scalar(dl_ch,32440,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size); - - multadd_complex_vector_real_scalar(dl_ch_prev,328,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),1,ue->frame_parms.ofdm_symbol_size); - multadd_complex_vector_real_scalar(dl_ch,32440,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size); - - multadd_complex_vector_real_scalar(dl_ch_prev,8192,dl_ch_prev+(3*2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size); - multadd_complex_vector_real_scalar(dl_ch,32440,dl_ch_prev+(3*2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size); - } - else - { - multadd_complex_vector_real_scalar(dl_ch_prev,24576,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size); - multadd_complex_vector_real_scalar(dl_ch,8192,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size); - - multadd_complex_vector_real_scalar(dl_ch_prev,16384,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),1,ue->frame_parms.ofdm_symbol_size); - multadd_complex_vector_real_scalar(dl_ch,16384,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size); - - multadd_complex_vector_real_scalar(dl_ch_prev,8192,dl_ch_prev+(3*2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size); - multadd_complex_vector_real_scalar(dl_ch,24576,dl_ch_prev+(3*2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size); - - } - } else { - multadd_complex_vector_real_scalar(dl_ch_prev,328,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size); - multadd_complex_vector_real_scalar(dl_ch,21845,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size); - - multadd_complex_vector_real_scalar(dl_ch_prev,21845,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)<<1),1,ue->frame_parms.ofdm_symbol_size); - multadd_complex_vector_real_scalar(dl_ch,10923,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size); - } // pilot spacing 3 symbols (1/3,2/3 combination) - } else if (symbol == pilot2) { - dl_ch_prev = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][pilot1*(ue->frame_parms.ofdm_symbol_size)]; - - multadd_complex_vector_real_scalar(dl_ch_prev,21845,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size); - multadd_complex_vector_real_scalar(dl_ch,10923,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size); - - multadd_complex_vector_real_scalar(dl_ch_prev,10923,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),1,ue->frame_parms.ofdm_symbol_size); - multadd_complex_vector_real_scalar(dl_ch,21845,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size); - } else { // symbol == pilot3 - // printf("Interpolating 0->%d\n",4-ue->frame_parms.Ncp); - dl_ch_prev = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][pilot2*(ue->frame_parms.ofdm_symbol_size)]; - - if (ue->frame_parms.Ncp==0) {// pilot spacing 4 symbols (1/4,1/2,3/4 combination) - multadd_complex_vector_real_scalar(dl_ch_prev,24576,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size); - multadd_complex_vector_real_scalar(dl_ch,8192,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size); - - multadd_complex_vector_real_scalar(dl_ch_prev,16384,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),1,ue->frame_parms.ofdm_symbol_size); - multadd_complex_vector_real_scalar(dl_ch,16384,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size); - - multadd_complex_vector_real_scalar(dl_ch_prev,8192,dl_ch_prev+(3*2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size); - multadd_complex_vector_real_scalar(dl_ch,24576,dl_ch_prev+(3*2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size); - } else { - multadd_complex_vector_real_scalar(dl_ch_prev,10923,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size); - multadd_complex_vector_real_scalar(dl_ch,21845,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size); - - multadd_complex_vector_real_scalar(dl_ch_prev,21845,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)<<1),1,ue->frame_parms.ofdm_symbol_size); - multadd_complex_vector_real_scalar(dl_ch,10923,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size); - } // pilot spacing 3 symbols (1/3,2/3 combination) - - if((ue->rx_offset_diff !=0) && ((Ns>>1) == 9)) - { - //LOG_I(PHY,"Extrapolate s7-->s11 to get s12 and s13 Ns %d\n", Ns); - interpolateS11S12 = 0; - //LOG_E(PHY,"Interpolate s7--s11 s12 s13 pilot 3 Ns %d l %d symbol %d \n", Ns, l, symbol); - int16_t *dlChEst_ofdm11 = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][pilot3*(ue->frame_parms.ofdm_symbol_size)]; - int16_t *dlChEst_ofdm7 = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][pilot2*(ue->frame_parms.ofdm_symbol_size)]; - - // interpolate ofdm s12: 5/4*ofdms11 + -1/4*ofdms7 5/4 q1.15 40960 -1/4 q1.15 8192 - int16_t *dlChEst_ofdm12 = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][12*ue->frame_parms.ofdm_symbol_size]; - for(int i=0; i<(2*ue->frame_parms.ofdm_symbol_size); i++) - { - int64_t tmp_mult = 0; - tmp_mult = ((int64_t)dlChEst_ofdm11[i] * 40960 - (int64_t)dlChEst_ofdm7[i] * 8192); - - tmp_mult = tmp_mult >> 15; - dlChEst_ofdm12[i] = tmp_mult; - } - - // interpolate ofdm s13: 3/2*ofdms11 + -1/2*ofdms7 3/2 q1.15 49152 1/2 q1.15 16384 - int16_t *dlChEst_ofdm13 = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][13*ue->frame_parms.ofdm_symbol_size]; - for(int i=0; i<(2*ue->frame_parms.ofdm_symbol_size); i++) - { - int64_t tmp_mult = 0; - tmp_mult = ((int64_t)dlChEst_ofdm11[i] * 49152 - (int64_t)dlChEst_ofdm7[i] * 16384); - - tmp_mult = tmp_mult >> 15; - dlChEst_ofdm13[i] = tmp_mult; - } - } + if ((symbol == 0)) { + // printf("Interpolating %d->0\n",4-ue->frame_parms.Ncp); + // dl_ch_prev = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][(4-ue->frame_parms.Ncp)*(ue->frame_parms.ofdm_symbol_size)]; + if(((Ns>>1)!=0) || ( ((Ns>>1)==0) && interpolateS11S12)) { + //LOG_I(PHY,"Interpolate s11-->s0 to get s12 and s13 Ns %d \n", Ns); + dl_ch_prev = (int16_t *)&dl_ch_estimates_previous[(p<<1)+aarx][pilot3*(ue->frame_parms.ofdm_symbol_size)]; + multadd_complex_vector_real_scalar(dl_ch_prev,21845,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size); + multadd_complex_vector_real_scalar(dl_ch,10923,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size); + multadd_complex_vector_real_scalar(dl_ch_prev,10923,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),1,ue->frame_parms.ofdm_symbol_size); + multadd_complex_vector_real_scalar(dl_ch,21845,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size); + } + interpolateS11S12 = 1; + } // this is 1/3,2/3 combination for pilots spaced by 3 symbols + else if (symbol == pilot1) { + dl_ch_prev = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][0]; + + //LOG_I(PHY,"Interpolate s0-->s4 to get s1 s2 and s3 Ns %d \n", Ns); + if (ue->frame_parms.Ncp==0) {// pilot spacing 4 symbols (1/4,1/2,3/4 combination) + uint8_t previous_subframe; + + if(Ns>>1 == 0) + previous_subframe = 9; + else + previous_subframe = ((Ns>>1) - 1 )%9; + + if((subframe_select(&ue->frame_parms,previous_subframe) == SF_UL)) { + multadd_complex_vector_real_scalar(dl_ch_prev,328,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size); + multadd_complex_vector_real_scalar(dl_ch,32440,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size); + multadd_complex_vector_real_scalar(dl_ch_prev,328,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),1,ue->frame_parms.ofdm_symbol_size); + multadd_complex_vector_real_scalar(dl_ch,32440,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size); + multadd_complex_vector_real_scalar(dl_ch_prev,8192,dl_ch_prev+(3*2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size); + multadd_complex_vector_real_scalar(dl_ch,32440,dl_ch_prev+(3*2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size); + } else { + multadd_complex_vector_real_scalar(dl_ch_prev,24576,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size); + multadd_complex_vector_real_scalar(dl_ch,8192,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size); + multadd_complex_vector_real_scalar(dl_ch_prev,16384,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),1,ue->frame_parms.ofdm_symbol_size); + multadd_complex_vector_real_scalar(dl_ch,16384,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size); + multadd_complex_vector_real_scalar(dl_ch_prev,8192,dl_ch_prev+(3*2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size); + multadd_complex_vector_real_scalar(dl_ch,24576,dl_ch_prev+(3*2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size); + } + } else { + multadd_complex_vector_real_scalar(dl_ch_prev,328,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size); + multadd_complex_vector_real_scalar(dl_ch,21845,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size); + multadd_complex_vector_real_scalar(dl_ch_prev,21845,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)<<1),1,ue->frame_parms.ofdm_symbol_size); + multadd_complex_vector_real_scalar(dl_ch,10923,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size); + } // pilot spacing 3 symbols (1/3,2/3 combination) + } else if (symbol == pilot2) { + dl_ch_prev = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][pilot1*(ue->frame_parms.ofdm_symbol_size)]; + multadd_complex_vector_real_scalar(dl_ch_prev,21845,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size); + multadd_complex_vector_real_scalar(dl_ch,10923,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size); + multadd_complex_vector_real_scalar(dl_ch_prev,10923,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),1,ue->frame_parms.ofdm_symbol_size); + multadd_complex_vector_real_scalar(dl_ch,21845,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size); + } else { // symbol == pilot3 + // printf("Interpolating 0->%d\n",4-ue->frame_parms.Ncp); + dl_ch_prev = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][pilot2*(ue->frame_parms.ofdm_symbol_size)]; + + if (ue->frame_parms.Ncp==0) {// pilot spacing 4 symbols (1/4,1/2,3/4 combination) + multadd_complex_vector_real_scalar(dl_ch_prev,24576,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size); + multadd_complex_vector_real_scalar(dl_ch,8192,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size); + multadd_complex_vector_real_scalar(dl_ch_prev,16384,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),1,ue->frame_parms.ofdm_symbol_size); + multadd_complex_vector_real_scalar(dl_ch,16384,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size); + multadd_complex_vector_real_scalar(dl_ch_prev,8192,dl_ch_prev+(3*2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size); + multadd_complex_vector_real_scalar(dl_ch,24576,dl_ch_prev+(3*2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size); + } else { + multadd_complex_vector_real_scalar(dl_ch_prev,10923,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),1,ue->frame_parms.ofdm_symbol_size); + multadd_complex_vector_real_scalar(dl_ch,21845,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)),0,ue->frame_parms.ofdm_symbol_size); + multadd_complex_vector_real_scalar(dl_ch_prev,21845,dl_ch_prev+(2*(ue->frame_parms.ofdm_symbol_size)<<1),1,ue->frame_parms.ofdm_symbol_size); + multadd_complex_vector_real_scalar(dl_ch,10923,dl_ch_prev+(2*((ue->frame_parms.ofdm_symbol_size)<<1)),0,ue->frame_parms.ofdm_symbol_size); + } // pilot spacing 3 symbols (1/3,2/3 combination) + + if((ue->rx_offset_diff !=0) && ((Ns>>1) == 9)) { + //LOG_I(PHY,"Extrapolate s7-->s11 to get s12 and s13 Ns %d\n", Ns); + interpolateS11S12 = 0; + //LOG_E(PHY,"Interpolate s7--s11 s12 s13 pilot 3 Ns %d l %d symbol %d \n", Ns, l, symbol); + int16_t *dlChEst_ofdm11 = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][pilot3*(ue->frame_parms.ofdm_symbol_size)]; + int16_t *dlChEst_ofdm7 = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][pilot2*(ue->frame_parms.ofdm_symbol_size)]; + // interpolate ofdm s12: 5/4*ofdms11 + -1/4*ofdms7 5/4 q1.15 40960 -1/4 q1.15 8192 + int16_t *dlChEst_ofdm12 = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][12*ue->frame_parms.ofdm_symbol_size]; + + for(int i=0; i<(2*ue->frame_parms.ofdm_symbol_size); i++) { + int64_t tmp_mult = 0; + tmp_mult = ((int64_t)dlChEst_ofdm11[i] * 40960 - (int64_t)dlChEst_ofdm7[i] * 8192); + tmp_mult = tmp_mult >> 15; + dlChEst_ofdm12[i] = tmp_mult; } - } + // interpolate ofdm s13: 3/2*ofdms11 + -1/2*ofdms7 3/2 q1.15 49152 1/2 q1.15 16384 + int16_t *dlChEst_ofdm13 = (int16_t *)&dl_ch_estimates[(p<<1)+aarx][13*ue->frame_parms.ofdm_symbol_size]; + + for(int i=0; i<(2*ue->frame_parms.ofdm_symbol_size); i++) { + int64_t tmp_mult = 0; + tmp_mult = ((int64_t)dlChEst_ofdm11[i] * 49152 - (int64_t)dlChEst_ofdm7[i] * 16384); + tmp_mult = tmp_mult >> 15; + dlChEst_ofdm13[i] = tmp_mult; + } + } + } + } } } void (*idft)(int16_t *,int16_t *, int); switch (ue->frame_parms.ofdm_symbol_size) { - case 128: - idft = idft128; - break; + case 128: + idft = idft128; + break; - case 256: - idft = idft256; - break; + case 256: + idft = idft256; + break; - case 512: - idft = idft512; - break; + case 512: + idft = idft512; + break; - case 1024: - idft = idft1024; - break; + case 1024: + idft = idft1024; + break; - case 1536: - idft = idft1536; - break; + case 1536: + idft = idft1536; + break; - case 2048: - idft = idft2048; - break; + case 2048: + idft = idft2048; + break; - default: - idft = idft512; - break; + default: + idft = idft512; + break; } - if( ((Ns%2) == 0) && (l == pilot0)) - { - // do ifft of channel estimate - for (aarx=0; aarx<ue->frame_parms.nb_antennas_rx; aarx++) - for (p=0; p<ue->frame_parms.nb_antenna_ports_eNB; p++) { - if (ue->common_vars.common_vars_rx_data_per_thread[ue->current_thread_id[Ns>>1]].dl_ch_estimates[eNB_offset][(p<<1)+aarx]) - { - //LOG_I(PHY,"Channel Impulse Computation Slot %d ThreadId %d Symbol %d \n", Ns, ue->current_thread_id[Ns>>1], l); - idft((int16_t*) &ue->common_vars.common_vars_rx_data_per_thread[ue->current_thread_id[Ns>>1]].dl_ch_estimates[eNB_offset][(p<<1)+aarx][8], - (int16_t*) ue->common_vars.common_vars_rx_data_per_thread[ue->current_thread_id[Ns>>1]].dl_ch_estimates_time[eNB_offset][(p<<1)+aarx],1); - } - } + if( ((Ns%2) == 0) && (l == pilot0)) { + // do ifft of channel estimate + for (aarx=0; aarx<ue->frame_parms.nb_antennas_rx; aarx++) + for (p=0; p<ue->frame_parms.nb_antenna_ports_eNB; p++) { + if (ue->common_vars.common_vars_rx_data_per_thread[ue->current_thread_id[Ns>>1]].dl_ch_estimates[eNB_offset][(p<<1)+aarx]) { + //LOG_I(PHY,"Channel Impulse Computation Slot %d ThreadId %d Symbol %d \n", Ns, ue->current_thread_id[Ns>>1], l); + idft((int16_t *) &ue->common_vars.common_vars_rx_data_per_thread[ue->current_thread_id[Ns>>1]].dl_ch_estimates[eNB_offset][(p<<1)+aarx][8], + (int16_t *) ue->common_vars.common_vars_rx_data_per_thread[ue->current_thread_id[Ns>>1]].dl_ch_estimates_time[eNB_offset][(p<<1)+aarx],1); + } + } } T(T_UE_PHY_DL_CHANNEL_ESTIMATE, T_INT(eNB_id), T_INT(ue->proc.proc_rxtx[ue->current_thread_id[Ns>>1]].frame_rx%1024), T_INT(ue->proc.proc_rxtx[ue->current_thread_id[Ns>>1]].subframe_rx), T_INT(0), T_BUFFER(&ue->common_vars.common_vars_rx_data_per_thread[ue->current_thread_id[Ns>>1]].dl_ch_estimates_time[eNB_offset][0][0], 512 * 4)); - return(0); } diff --git a/openair1/PHY/LTE_TRANSPORT/prach.c b/openair1/PHY/LTE_TRANSPORT/prach.c index 42d217130af..405990fb6ba 100644 --- a/openair1/PHY/LTE_TRANSPORT/prach.c +++ b/openair1/PHY/LTE_TRANSPORT/prach.c @@ -39,37 +39,33 @@ #include "prach_extern.h" #if (LTE_RRC_VERSION < MAKE_VERSION(14, 0, 0)) -#define rx_prach0 rx_prach + #define rx_prach0 rx_prach #endif void rx_prach0(PHY_VARS_eNB *eNB, - RU_t *ru, - uint16_t *max_preamble, - uint16_t *max_preamble_energy, - uint16_t *max_preamble_delay, - uint16_t Nf, - uint8_t tdd_mapindex + RU_t *ru, + uint16_t *max_preamble, + uint16_t *max_preamble_energy, + uint16_t *max_preamble_delay, + uint16_t Nf, + uint8_t tdd_mapindex #if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0)) - ,uint8_t br_flag, - uint8_t ce_level + ,uint8_t br_flag, + uint8_t ce_level #endif - ) -{ - + ) { int i; - LTE_DL_FRAME_PARMS *fp; lte_frame_type_t frame_type; - uint16_t rootSequenceIndex; - uint8_t prach_ConfigIndex; - uint8_t Ncs_config; - uint8_t restricted_set; + uint16_t rootSequenceIndex; + uint8_t prach_ConfigIndex; + uint8_t Ncs_config; + uint8_t restricted_set; uint8_t n_ra_prb; int subframe; int16_t *prachF=NULL; int16_t **rxsigF=NULL; int nb_rx; - int16_t *prach2; uint8_t preamble_index; uint16_t NCS,NCS2; @@ -93,106 +89,108 @@ void rx_prach0(PHY_VARS_eNB *eNB, int16_t levdB; int fft_size,log2_ifft_size; int16_t prach_ifft_tmp[2048*2] __attribute__((aligned(32))); - int32_t *prach_ifft=(int32_t*)NULL; + int32_t *prach_ifft=(int32_t *)NULL; int32_t **prach_ifftp=(int32_t **)NULL; #if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0)) int prach_ifft_cnt=0; #endif - - if (ru) { + if (ru) { fp = &ru->frame_parms; nb_rx = ru->nb_rx; - } - else if (eNB) { + } else if (eNB) { fp = &eNB->frame_parms; nb_rx = fp->nb_antennas_rx; - } - else AssertFatal(1==0,"rx_prach called without valid RU or eNB descriptor\n"); - - frame_type = fp->frame_type; + } else AssertFatal(1==0,"rx_prach called without valid RU or eNB descriptor\n"); + frame_type = fp->frame_type; #if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0)) + if (br_flag == 1) { AssertFatal(fp->prach_emtc_config_common.prach_Config_enabled==1, - "emtc prach_Config is not enabled\n"); + "emtc prach_Config is not enabled\n"); AssertFatal(fp->prach_emtc_config_common.prach_ConfigInfo.prach_CElevel_enable[ce_level]==1, - "ce_level %d is not active\n",ce_level); + "ce_level %d is not active\n",ce_level); rootSequenceIndex = fp->prach_emtc_config_common.rootSequenceIndex; prach_ConfigIndex = fp->prach_emtc_config_common.prach_ConfigInfo.prach_ConfigIndex[ce_level]; Ncs_config = fp->prach_emtc_config_common.prach_ConfigInfo.zeroCorrelationZoneConfig; restricted_set = fp->prach_emtc_config_common.prach_ConfigInfo.highSpeedFlag; n_ra_prb = get_prach_prb_offset(fp,prach_ConfigIndex, - fp->prach_emtc_config_common.prach_ConfigInfo.prach_FreqOffset[ce_level], - tdd_mapindex,Nf); + fp->prach_emtc_config_common.prach_ConfigInfo.prach_FreqOffset[ce_level], + tdd_mapindex,Nf); // update pointers to results for ce_level max_preamble += ce_level; max_preamble_energy += ce_level; max_preamble_delay += ce_level; - } - else + } else #endif - { - rootSequenceIndex = fp->prach_config_common.rootSequenceIndex; - prach_ConfigIndex = fp->prach_config_common.prach_ConfigInfo.prach_ConfigIndex; - Ncs_config = fp->prach_config_common.prach_ConfigInfo.zeroCorrelationZoneConfig; - restricted_set = fp->prach_config_common.prach_ConfigInfo.highSpeedFlag; - n_ra_prb = get_prach_prb_offset(fp,prach_ConfigIndex, - fp->prach_config_common.prach_ConfigInfo.prach_FreqOffset, - tdd_mapindex,Nf); - } + { + rootSequenceIndex = fp->prach_config_common.rootSequenceIndex; + prach_ConfigIndex = fp->prach_config_common.prach_ConfigInfo.prach_ConfigIndex; + Ncs_config = fp->prach_config_common.prach_ConfigInfo.zeroCorrelationZoneConfig; + restricted_set = fp->prach_config_common.prach_ConfigInfo.highSpeedFlag; + n_ra_prb = get_prach_prb_offset(fp,prach_ConfigIndex, + fp->prach_config_common.prach_ConfigInfo.prach_FreqOffset, + tdd_mapindex,Nf); + } int16_t *prach[nb_rx]; uint8_t prach_fmt = get_prach_fmt(prach_ConfigIndex,frame_type); uint16_t N_ZC = (prach_fmt <4)?839:139; - + if (eNB) { #if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0)) + if (br_flag == 1) { prach_ifftp = eNB->prach_vars_br.prach_ifft[ce_level]; subframe = eNB->proc.subframe_prach_br; prachF = eNB->prach_vars_br.prachF; rxsigF = eNB->prach_vars_br.rxsigF[ce_level]; - if (LOG_DEBUGFLAG(PRACH)){ - if (((ru->proc.frame_prach)&1023) < 20) LOG_I(PHY,"PRACH (eNB) : running rx_prach (br_flag %d, ce_level %d) for frame %d subframe %d, prach_FreqOffset %d, prach_ConfigIndex %d, rootSequenceIndex %d, repetition number %d,numRepetitionsPrePreambleAttempt %d\n", - br_flag,ce_level,ru->proc.frame_prach,subframe, - fp->prach_emtc_config_common.prach_ConfigInfo.prach_FreqOffset[ce_level], - prach_ConfigIndex,rootSequenceIndex, - eNB->prach_vars_br.repetition_number[ce_level], - fp->prach_emtc_config_common.prach_ConfigInfo.prach_numRepetitionPerPreambleAttempt[ce_level]); + + if (LOG_DEBUGFLAG(PRACH)) { + if (((ru->proc.frame_prach)&1023) < 20) LOG_I(PHY, + "PRACH (eNB) : running rx_prach (br_flag %d, ce_level %d) for frame %d subframe %d, prach_FreqOffset %d, prach_ConfigIndex %d, rootSequenceIndex %d, repetition number %d,numRepetitionsPrePreambleAttempt %d\n", + br_flag,ce_level,ru->proc.frame_prach,subframe, + fp->prach_emtc_config_common.prach_ConfigInfo.prach_FreqOffset[ce_level], + prach_ConfigIndex,rootSequenceIndex, + eNB->prach_vars_br.repetition_number[ce_level], + fp->prach_emtc_config_common.prach_ConfigInfo.prach_numRepetitionPerPreambleAttempt[ce_level]); } } else #endif - { - prach_ifftp = eNB->prach_vars.prach_ifft[0]; - subframe = eNB->proc.subframe_prach; - prachF = eNB->prach_vars.prachF; - rxsigF = eNB->prach_vars.rxsigF[0]; - if (LOG_DEBUGFLAG(PRACH)){ - if (((ru->proc.frame_prach)&1023) < 20) LOG_I(PHY,"PRACH (eNB) : running rx_prach for subframe %d, prach_FreqOffset %d, prach_ConfigIndex %d , rootSequenceIndex %d\n", subframe,fp->prach_config_common.prach_ConfigInfo.prach_FreqOffset,prach_ConfigIndex,rootSequenceIndex); - } + { + prach_ifftp = eNB->prach_vars.prach_ifft[0]; + subframe = eNB->proc.subframe_prach; + prachF = eNB->prach_vars.prachF; + rxsigF = eNB->prach_vars.rxsigF[0]; + + if (LOG_DEBUGFLAG(PRACH)) { + if (((ru->proc.frame_prach)&1023) < 20) LOG_I(PHY,"PRACH (eNB) : running rx_prach for subframe %d, prach_FreqOffset %d, prach_ConfigIndex %d , rootSequenceIndex %d\n", subframe, + fp->prach_config_common.prach_ConfigInfo.prach_FreqOffset,prach_ConfigIndex,rootSequenceIndex); } - } - else { + } + } else { #if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0)) + if (br_flag == 1) { - subframe = ru->proc.subframe_prach_br; - rxsigF = ru->prach_rxsigF_br[ce_level]; - if (LOG_DEBUGFLAG(PRACH)){ - if (((ru->proc.frame_prach)&1023) < 20) LOG_I(PHY,"PRACH (RU) : running rx_prach (br_flag %d, ce_level %d) for frame %d subframe %d, prach_FreqOffset %d, prach_ConfigIndex %d\n", - br_flag,ce_level,ru->proc.frame_prach,subframe,fp->prach_emtc_config_common.prach_ConfigInfo.prach_FreqOffset[ce_level],prach_ConfigIndex); - } + subframe = ru->proc.subframe_prach_br; + rxsigF = ru->prach_rxsigF_br[ce_level]; + + if (LOG_DEBUGFLAG(PRACH)) { + if (((ru->proc.frame_prach)&1023) < 20) LOG_I(PHY,"PRACH (RU) : running rx_prach (br_flag %d, ce_level %d) for frame %d subframe %d, prach_FreqOffset %d, prach_ConfigIndex %d\n", + br_flag,ce_level,ru->proc.frame_prach,subframe,fp->prach_emtc_config_common.prach_ConfigInfo.prach_FreqOffset[ce_level],prach_ConfigIndex); + } } else #endif - { - subframe = ru->proc.subframe_prach; - rxsigF = ru->prach_rxsigF; - if (LOG_DEBUGFLAG(PRACH)){ - if (((ru->proc.frame_prach)&1023) < 20) LOG_I(PHY,"PRACH (RU) : running rx_prach for subframe %d, prach_FreqOffset %d, prach_ConfigIndex %d\n", - subframe,fp->prach_config_common.prach_ConfigInfo.prach_FreqOffset,prach_ConfigIndex); - } - } + { + subframe = ru->proc.subframe_prach; + rxsigF = ru->prach_rxsigF; + if (LOG_DEBUGFLAG(PRACH)) { + if (((ru->proc.frame_prach)&1023) < 20) LOG_I(PHY,"PRACH (RU) : running rx_prach for subframe %d, prach_FreqOffset %d, prach_ConfigIndex %d\n", + subframe,fp->prach_config_common.prach_ConfigInfo.prach_FreqOffset,prach_ConfigIndex); + } + } } AssertFatal(ru!=NULL,"ru is null\n"); @@ -200,25 +198,29 @@ void rx_prach0(PHY_VARS_eNB *eNB, for (aa=0; aa<nb_rx; aa++) { if (ru->if_south == LOCAL_RF) { // set the time-domain signal if we have to use it in this node // DJP - indexing below in subframe zero takes us off the beginning of the array??? - prach[aa] = (int16_t*)&ru->common.rxdata[aa][(subframe*fp->samples_per_tti)-ru->N_TA_offset]; - - if (LOG_DUMPFLAG(PRACH)){ - int32_t en0=signal_energy((int32_t*)prach[aa],fp->samples_per_tti); - int8_t dbEn0 = dB_fixed(en0); - int8_t rach_dBm = dbEn0 - ru->rx_total_gain_dB; - char buffer[80]; - if (dbEn0>32 && prach[0]!= NULL) { - static int counter=0; - sprintf(buffer, "%s%d", "/tmp/prach_rx",counter); - LOG_M(buffer,"prach_rx",prach[0],fp->samples_per_tti,1,13); - } + prach[aa] = (int16_t *)&ru->common.rxdata[aa][(subframe*fp->samples_per_tti)-ru->N_TA_offset]; + + if (LOG_DUMPFLAG(PRACH)) { + int32_t en0=signal_energy((int32_t *)prach[aa],fp->samples_per_tti); + int8_t dbEn0 = dB_fixed(en0); + int8_t rach_dBm = dbEn0 - ru->rx_total_gain_dB; + char buffer[80]; + + if (dbEn0>32 && prach[0]!= NULL) { + static int counter=0; + sprintf(buffer, "%s%d", "/tmp/prach_rx",counter); + LOG_M(buffer,"prach_rx",prach[0],fp->samples_per_tti,1,13); + } + if (dB_fixed(en0)>32) { sprintf(buffer, "rach_dBm:%d",rach_dBm); + if (prach[0]!= NULL) LOG_M("prach_rx","prach_rx",prach[0],fp->samples_per_tti,1,1); - LOG_I(PHY,"RU %d, br_flag %d ce_level %d frame %d subframe %d per_tti:%d prach:%p (energy %d) TA:%d %s rxdata:%p index:%d\n", - ru->idx,br_flag,ce_level,ru->proc.frame_prach,subframe,fp->samples_per_tti, - prach[aa],dbEn0,ru->N_TA_offset,buffer,ru->common.rxdata[aa], - (subframe*fp->samples_per_tti)-ru->N_TA_offset); + + LOG_I(PHY,"RU %d, br_flag %d ce_level %d frame %d subframe %d per_tti:%d prach:%p (energy %d) TA:%d %s rxdata:%p index:%d\n", + ru->idx,br_flag,ce_level,ru->proc.frame_prach,subframe,fp->samples_per_tti, + prach[aa],dbEn0,ru->N_TA_offset,buffer,ru->common.rxdata[aa], + (subframe*fp->samples_per_tti)-ru->N_TA_offset); } } } @@ -227,19 +229,17 @@ void rx_prach0(PHY_VARS_eNB *eNB, // First compute physical root sequence if (restricted_set == 0) { AssertFatal(Ncs_config<=15, - "Illegal Ncs_config for unrestricted format %d\n",Ncs_config); + "Illegal Ncs_config for unrestricted format %d\n",Ncs_config); NCS = NCS_unrestricted[Ncs_config]; } else { AssertFatal(Ncs_config<=14, - "FATAL, Illegal Ncs_config for restricted format %d\n",Ncs_config); + "FATAL, Illegal Ncs_config for restricted format %d\n",Ncs_config); NCS = NCS_restricted[Ncs_config]; } if (eNB) start_meas(&eNB->rx_prach); - prach_root_sequence_map = (prach_fmt < 4) ? prach_root_sequence_map0_3 : prach_root_sequence_map4; - // PDP is oversampled, e.g. 1024 sample instead of 839 // Adapt the NCS (zero-correlation zones) with oversampling factor e.g. 1024/839 NCS2 = (N_ZC==839) ? ((NCS<<10)/839) : ((NCS<<8)/139); @@ -248,56 +248,56 @@ void rx_prach0(PHY_VARS_eNB *eNB, NCS2 = N_ZC; switch (prach_fmt) { - case 0: - Ncp = 3168; - break; - - case 1: - case 3: - Ncp = 21024; - break; - - case 2: - Ncp = 6240; - break; - - case 4: - Ncp = 448; - break; - - default: - Ncp = 3168; - break; + case 0: + Ncp = 3168; + break; + + case 1: + case 3: + Ncp = 21024; + break; + + case 2: + Ncp = 6240; + break; + + case 4: + Ncp = 448; + break; + + default: + Ncp = 3168; + break; } // Adjust CP length based on UL bandwidth switch (fp->N_RB_UL) { - case 6: - Ncp>>=4; - break; + case 6: + Ncp>>=4; + break; - case 15: - Ncp>>=3; - break; + case 15: + Ncp>>=3; + break; - case 25: - Ncp>>=2; - break; + case 25: + Ncp>>=2; + break; - case 50: - Ncp>>=1; - break; + case 50: + Ncp>>=1; + break; - case 75: - Ncp=(Ncp*3)>>2; - break; - - case 100: - if (fp->threequarter_fs == 1) + case 75: Ncp=(Ncp*3)>>2; - break; - } + break; + case 100: + if (fp->threequarter_fs == 1) + Ncp=(Ncp*3)>>2; + + break; + } if (((eNB!=NULL) && (ru->function != NGFI_RAU_IF4p5))|| ((eNB==NULL) && (ru->function == NGFI_RRU_IF4p5))) { // compute the DFTs of the PRACH temporal resources @@ -305,192 +305,188 @@ void rx_prach0(PHY_VARS_eNB *eNB, if (LOG_DEBUGFLAG(PRACH)) { LOG_D(PHY,"rx_prach: Doing FFT for N_RB_UL %d nb_rx:%d Ncp:%d\n",fp->N_RB_UL, nb_rx, Ncp); } + for (aa=0; aa<nb_rx; aa++) { AssertFatal(prach[aa]!=NULL,"prach[%d] is null\n",aa); prach2 = prach[aa] + (Ncp<<1); - + // do DFT switch (fp->N_RB_UL) { - case 6: - if (prach_fmt == 4) { - dft256(prach2,rxsigF[aa],1); - } else { - dft1536(prach2,rxsigF[aa],1); - - if (prach_fmt>1) - dft1536(prach2+3072,rxsigF[aa]+3072,1); - } - - break; - - case 15: - if (prach_fmt == 4) { - dft256(prach2,rxsigF[aa],1); - } else { - dft3072(prach2,rxsigF[aa],1); - - if (prach_fmt>1) - dft3072(prach2+6144,rxsigF[aa]+6144,1); - } - - break; - - case 25: - default: - if (prach_fmt == 4) { - dft1024(prach2,rxsigF[aa],1); - fft_size = 1024; - } else { - dft6144(prach2,rxsigF[aa],1); - - if (prach_fmt>1) - dft6144(prach2+12288,rxsigF[aa]+12288,1); - - fft_size = 6144; - } - - break; - - case 50: - if (prach_fmt == 4) { - dft2048(prach2,rxsigF[aa],1); - } else { - dft12288(prach2,rxsigF[aa],1); - - if (prach_fmt>1) - dft12288(prach2+24576,rxsigF[aa]+24576,1); - } - - break; - - case 75: - if (prach_fmt == 4) { - dft3072(prach2,rxsigF[aa],1); - } else { - dft18432(prach2,rxsigF[aa],1); - - if (prach_fmt>1) - dft18432(prach2+36864,rxsigF[aa]+36864,1); - } - - break; - - case 100: - if (fp->threequarter_fs==0) { - if (prach_fmt == 4) { - dft4096(prach2,rxsigF[aa],1); - } else { - dft24576(prach2,rxsigF[aa],1); - - if (prach_fmt>1) - dft24576(prach2+49152,rxsigF[aa]+49152,1); - } - } else { - if (prach_fmt == 4) { - dft3072(prach2,rxsigF[aa],1); - } else { - dft18432(prach2,rxsigF[aa],1); - - if (prach_fmt>1) - dft18432(prach2+36864,rxsigF[aa]+36864,1); - } - } - - break; + case 6: + if (prach_fmt == 4) { + dft256(prach2,rxsigF[aa],1); + } else { + dft1536(prach2,rxsigF[aa],1); + + if (prach_fmt>1) + dft1536(prach2+3072,rxsigF[aa]+3072,1); + } + + break; + + case 15: + if (prach_fmt == 4) { + dft256(prach2,rxsigF[aa],1); + } else { + dft3072(prach2,rxsigF[aa],1); + + if (prach_fmt>1) + dft3072(prach2+6144,rxsigF[aa]+6144,1); + } + + break; + + case 25: + default: + if (prach_fmt == 4) { + dft1024(prach2,rxsigF[aa],1); + fft_size = 1024; + } else { + dft6144(prach2,rxsigF[aa],1); + + if (prach_fmt>1) + dft6144(prach2+12288,rxsigF[aa]+12288,1); + + fft_size = 6144; + } + + break; + + case 50: + if (prach_fmt == 4) { + dft2048(prach2,rxsigF[aa],1); + } else { + dft12288(prach2,rxsigF[aa],1); + + if (prach_fmt>1) + dft12288(prach2+24576,rxsigF[aa]+24576,1); + } + + break; + + case 75: + if (prach_fmt == 4) { + dft3072(prach2,rxsigF[aa],1); + } else { + dft18432(prach2,rxsigF[aa],1); + + if (prach_fmt>1) + dft18432(prach2+36864,rxsigF[aa]+36864,1); + } + + break; + + case 100: + if (fp->threequarter_fs==0) { + if (prach_fmt == 4) { + dft4096(prach2,rxsigF[aa],1); + } else { + dft24576(prach2,rxsigF[aa],1); + + if (prach_fmt>1) + dft24576(prach2+49152,rxsigF[aa]+49152,1); + } + } else { + if (prach_fmt == 4) { + dft3072(prach2,rxsigF[aa],1); + } else { + dft18432(prach2,rxsigF[aa],1); + + if (prach_fmt>1) + dft18432(prach2+36864,rxsigF[aa]+36864,1); + } + } + + break; } k = (12*n_ra_prb) - 6*fp->N_RB_UL; - + if (k<0) { - k+=(fp->ofdm_symbol_size); + k+=(fp->ofdm_symbol_size); } - + k*=12; - k+=13; + k+=13; k*=2; int dftsize_x2 = fp->ofdm_symbol_size*24; //LOG_D(PHY,"Shifting prach_rxF from %d to 0\n",k); - if ((k+(839*2)) > dftsize_x2) { // PRACH signal is split around DC - memmove((void*)&rxsigF[aa][dftsize_x2-k],(void*)&rxsigF[aa][0],(k+(839*2)-dftsize_x2)*2); - memmove((void*)&rxsigF[aa][0],(void*)(&rxsigF[aa][k]),(dftsize_x2-k)*2); - } - else // PRACH signal is not split around DC - memmove((void*)&rxsigF[aa][0],(void*)(&rxsigF[aa][k]),839*4); - + if ((k+(839*2)) > dftsize_x2) { // PRACH signal is split around DC + memmove((void *)&rxsigF[aa][dftsize_x2-k],(void *)&rxsigF[aa][0],(k+(839*2)-dftsize_x2)*2); + memmove((void *)&rxsigF[aa][0],(void *)(&rxsigF[aa][k]),(dftsize_x2-k)*2); + } else // PRACH signal is not split around DC + memmove((void *)&rxsigF[aa][0],(void *)(&rxsigF[aa][k]),839*4); } - } - if ((eNB==NULL) && (ru!=NULL) && ru->function == NGFI_RRU_IF4p5) { - + if ((eNB==NULL) && ru->function == NGFI_RRU_IF4p5) { /// **** send_IF4 of rxsigF to RAU **** /// #if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0)) - if (br_flag == 1) send_IF4p5(ru, ru->proc.frame_prach, ru->proc.subframe_prach, IF4p5_PRACH+1+ce_level); - + if (br_flag == 1) send_IF4p5(ru, ru->proc.frame_prach, ru->proc.subframe_prach, IF4p5_PRACH+1+ce_level); else #endif send_IF4p5(ru, ru->proc.frame_prach, ru->proc.subframe_prach, IF4p5_PRACH); - + return; } else if (eNB!=NULL) { - if ( LOG_DEBUGFLAG(PRACH)) { - int en = dB_fixed(signal_energy((int32_t*)&rxsigF[0][0],840)); + int en = dB_fixed(signal_energy((int32_t *)&rxsigF[0][0],840)); + if ((en > 60)&&(br_flag==1)) LOG_I(PHY,"PRACH (br_flag %d,ce_level %d, n_ra_prb %d, k %d): Frame %d, Subframe %d => %d dB\n",br_flag,ce_level,n_ra_prb,k,eNB->proc.frame_rx,eNB->proc.subframe_rx,en); } } - - // in case of RAU and prach received rx_thread wakes up prach + // in case of RAU and prach received rx_thread wakes up prach // here onwards is for eNodeB_3GPP or NGFI_RAU_IF4p5 - preamble_offset_old = 99; - uint8_t update_TA = 4; uint8_t update_TA2 = 1; + switch (eNB->frame_parms.N_RB_DL) { - case 6: - update_TA = 16; - break; - - case 25: - update_TA = 4; - break; - - case 50: - update_TA = 2; - break; - - case 75: - update_TA = 3; - update_TA2 = 2; - case 100: - update_TA = 1; - break; + case 6: + update_TA = 16; + break; + + case 25: + update_TA = 4; + break; + + case 50: + update_TA = 2; + break; + + case 75: + update_TA = 3; + update_TA2 = 2; + break; + + case 100: + update_TA = 1; + break; } - + *max_preamble_energy=0; + for (preamble_index=0 ; preamble_index<64 ; preamble_index++) { + if (LOG_DEBUGFLAG(PRACH)) { + int en = dB_fixed(signal_energy((int32_t *)&rxsigF[0][0],840)); - if (LOG_DEBUGFLAG(PRACH)){ - int en = dB_fixed(signal_energy((int32_t*)&rxsigF[0][0],840)); if (en>60) LOG_I(PHY,"frame %d, subframe %d : Trying preamble %d (br_flag %d)\n",ru->proc.frame_prach,subframe,preamble_index,br_flag); } + if (restricted_set == 0) { // This is the relative offset in the root sequence table (5.7.2-4 from 36.211) for the given preamble index preamble_offset = ((NCS==0)? preamble_index : (preamble_index/(N_ZC/NCS))); - + if (preamble_offset != preamble_offset_old) { preamble_offset_old = preamble_offset; new_dft = 1; // This is the \nu corresponding to the preamble index preamble_shift = 0; - } - - else { + } else { preamble_shift -= NCS; - + if (preamble_shift < 0) preamble_shift+=N_ZC; } @@ -519,7 +515,6 @@ void rx_prach0(PHY_VARS_eNB *eNB, } u = prach_root_sequence_map[index]; - uint16_t n_group_ra = 0; if ( (du[u]<(N_ZC/3)) && (du[u]>=NCS) ) { @@ -560,177 +555,184 @@ void rx_prach0(PHY_VARS_eNB *eNB, // Compute DFT of RX signal (conjugate input, results in conjugate output) for each new rootSequenceIndex if (LOG_DEBUGFLAG(PRACH)) { - int en = dB_fixed(signal_energy((int32_t*)&rxsigF[0][0],840)); + int en = dB_fixed(signal_energy((int32_t *)&rxsigF[0][0],840)); + if (en>60) LOG_I(PHY,"frame %d, subframe %d : preamble index %d: offset %d, preamble shift %d (br_flag %d, en %d)\n", - ru->proc.frame_prach,subframe,preamble_index,preamble_offset,preamble_shift,br_flag,en); + ru->proc.frame_prach,subframe,preamble_index,preamble_offset,preamble_shift,br_flag,en); } + log2_ifft_size = 10; fft_size = 6144; if (new_dft == 1) { new_dft = 0; - #if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0)) + if (br_flag == 1) { - Xu=(int16_t*)eNB->X_u_br[ce_level][preamble_offset-first_nonzero_root_idx]; - prach_ifft = prach_ifftp[prach_ifft_cnt++]; - if (eNB->prach_vars_br.repetition_number[ce_level]==1) memset(prach_ifft,0,((N_ZC==839)?2048:256)*sizeof(int32_t)); - } - else + Xu=(int16_t *)eNB->X_u_br[ce_level][preamble_offset-first_nonzero_root_idx]; + prach_ifft = prach_ifftp[prach_ifft_cnt++]; + + if (eNB->prach_vars_br.repetition_number[ce_level]==1) memset(prach_ifft,0,((N_ZC==839)?2048:256)*sizeof(int32_t)); + } else #endif - { - Xu=(int16_t*)eNB->X_u[preamble_offset-first_nonzero_root_idx]; - prach_ifft = prach_ifftp[0]; - memset(prach_ifft,0,((N_ZC==839) ? 2048 : 256)*sizeof(int32_t)); - } + { + Xu=(int16_t *)eNB->X_u[preamble_offset-first_nonzero_root_idx]; + prach_ifft = prach_ifftp[0]; + memset(prach_ifft,0,((N_ZC==839) ? 2048 : 256)*sizeof(int32_t)); + } memset(prachF, 0, sizeof(int16_t)*2*1024 ); - if (LOG_DUMPFLAG(PRACH)) { + + if (LOG_DUMPFLAG(PRACH)) { if (prach[0]!= NULL) LOG_M("prach_rx0.m","prach_rx0",prach[0],6144+792,1,1); - LOG_M("prach_rx1.m","prach_rx1",prach[1],6144+792,1,1); - LOG_M("prach_rxF0.m","prach_rxF0",rxsigF[0],24576,1,1); - LOG_M("prach_rxF1.m","prach_rxF1",rxsigF[1],6144,1,1); + + LOG_M("prach_rx1.m","prach_rx1",prach[1],6144+792,1,1); + LOG_M("prach_rxF0.m","prach_rxF0",rxsigF[0],24576,1,1); + LOG_M("prach_rxF1.m","prach_rxF1",rxsigF[1],6144,1,1); } - - for (aa=0;aa<nb_rx; aa++) { - // Do componentwise product with Xu* on each antenna - - k=0; - for (offset=0; offset<(N_ZC<<1); offset+=2) { - prachF[offset] = (int16_t)(((int32_t)Xu[offset]*rxsigF[aa][k] + (int32_t)Xu[offset+1]*rxsigF[aa][k+1])>>15); - prachF[offset+1] = (int16_t)(((int32_t)Xu[offset]*rxsigF[aa][k+1] - (int32_t)Xu[offset+1]*rxsigF[aa][k])>>15); - k+=2; - if (k==(12*2*fp->ofdm_symbol_size)) - k=0; - } - - // Now do IFFT of size 1024 (N_ZC=839) or 256 (N_ZC=139) - if (N_ZC == 839) { - log2_ifft_size = 10; - idft1024(prachF,prach_ifft_tmp,1); - // compute energy and accumulate over receive antennas and repetitions for BR - for (i=0;i<2048;i++) - prach_ifft[i] += (prach_ifft_tmp[i<<1]*prach_ifft_tmp[i<<1] + prach_ifft_tmp[1+(i<<1)]*prach_ifft_tmp[1+(i<<1)])>>10; - } else { - idft256(prachF,prach_ifft_tmp,1); - log2_ifft_size = 8; - // compute energy and accumulate over receive antennas and repetitions for BR - for (i=0;i<256;i++) - prach_ifft[i] += (prach_ifft_tmp[i<<1]*prach_ifft_tmp[(i<<1)] + prach_ifft_tmp[1+(i<<1)]*prach_ifft_tmp[1+(i<<1)])>>10; - } - - if (LOG_DUMPFLAG(PRACH)) { - if (aa==0) LOG_M("prach_rxF_comp0.m","prach_rxF_comp0",prachF,1024,1,1); + + for (aa=0; aa<nb_rx; aa++) { + // Do componentwise product with Xu* on each antenna + k=0; + + for (offset=0; offset<(N_ZC<<1); offset+=2) { + prachF[offset] = (int16_t)(((int32_t)Xu[offset]*rxsigF[aa][k] + (int32_t)Xu[offset+1]*rxsigF[aa][k+1])>>15); + prachF[offset+1] = (int16_t)(((int32_t)Xu[offset]*rxsigF[aa][k+1] - (int32_t)Xu[offset+1]*rxsigF[aa][k])>>15); + k+=2; + + if (k==(12*2*fp->ofdm_symbol_size)) + k=0; + } + + // Now do IFFT of size 1024 (N_ZC=839) or 256 (N_ZC=139) + if (N_ZC == 839) { + log2_ifft_size = 10; + idft1024(prachF,prach_ifft_tmp,1); + + // compute energy and accumulate over receive antennas and repetitions for BR + for (i=0; i<2048; i++) + prach_ifft[i] += (prach_ifft_tmp[i<<1]*prach_ifft_tmp[i<<1] + prach_ifft_tmp[1+(i<<1)]*prach_ifft_tmp[1+(i<<1)])>>10; + } else { + idft256(prachF,prach_ifft_tmp,1); + log2_ifft_size = 8; + + // compute energy and accumulate over receive antennas and repetitions for BR + for (i=0; i<256; i++) + prach_ifft[i] += (prach_ifft_tmp[i<<1]*prach_ifft_tmp[(i<<1)] + prach_ifft_tmp[1+(i<<1)]*prach_ifft_tmp[1+(i<<1)])>>10; + } + + if (LOG_DUMPFLAG(PRACH)) { + if (aa==0) LOG_M("prach_rxF_comp0.m","prach_rxF_comp0",prachF,1024,1,1); + if (aa==1) LOG_M("prach_rxF_comp1.m","prach_rxF_comp1",prachF,1024,1,1); } }// antennas_rx } // new dft - - // check energy in nth time shift, for + + // check energy in nth time shift, for #if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0)) + if ((br_flag==0) || - (eNB->prach_vars_br.repetition_number[ce_level]== - eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_numRepetitionPerPreambleAttempt[ce_level])) + (eNB->prach_vars_br.repetition_number[ce_level]== + eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_numRepetitionPerPreambleAttempt[ce_level])) #endif - { - if (LOG_DEBUGFLAG(PRACH)){ - int en = dB_fixed(signal_energy((int32_t*)&rxsigF[0][0],840)); - if (en>60) LOG_I(PHY,"frame %d, subframe %d: Checking for peak in time-domain (br_flag %d, en %d)\n",ru->proc.frame_prach,subframe,br_flag,en); + { + if (LOG_DEBUGFLAG(PRACH)) { + int en = dB_fixed(signal_energy((int32_t *)&rxsigF[0][0],840)); + + if (en>60) LOG_I(PHY,"frame %d, subframe %d: Checking for peak in time-domain (br_flag %d, en %d)\n",ru->proc.frame_prach,subframe,br_flag,en); } - preamble_shift2 = ((preamble_shift==0) ? 0 : ((preamble_shift<<log2_ifft_size)/N_ZC)); - - - for (i=0; i<NCS2; i++) { - lev = (int32_t)prach_ifft[(preamble_shift2+i)]; - levdB = dB_fixed_times10(lev); - - if (levdB>*max_preamble_energy) { - *max_preamble_energy = levdB; - *max_preamble_delay = ((i*fft_size)>>log2_ifft_size)*update_TA/update_TA2; - *max_preamble = preamble_index; - if (LOG_DEBUGFLAG(PRACH)){ - int en = dB_fixed(signal_energy((int32_t*)&rxsigF[0][0],840)); - if ((en>60) && (br_flag==1)) - LOG_D(PHY,"frame %d, subframe %d : max_preamble_energy %d, max_preamble_delay %d, max_preamble %d (br_flag %d,ce_level %d, levdB %d, lev %d)\n", - ru->proc.frame_prach,subframe, - *max_preamble_energy,*max_preamble_delay, - *max_preamble,br_flag,ce_level,levdB,lev); - } - } - } + preamble_shift2 = ((preamble_shift==0) ? 0 : ((preamble_shift<<log2_ifft_size)/N_ZC)); + + for (i=0; i<NCS2; i++) { + lev = (int32_t)prach_ifft[(preamble_shift2+i)]; + levdB = dB_fixed_times10(lev); + + if (levdB>*max_preamble_energy) { + *max_preamble_energy = levdB; + *max_preamble_delay = ((i*fft_size)>>log2_ifft_size)*update_TA/update_TA2; + *max_preamble = preamble_index; + + if (LOG_DEBUGFLAG(PRACH)) { + int en = dB_fixed(signal_energy((int32_t *)&rxsigF[0][0],840)); + + if ((en>60) && (br_flag==1)) + LOG_D(PHY,"frame %d, subframe %d : max_preamble_energy %d, max_preamble_delay %d, max_preamble %d (br_flag %d,ce_level %d, levdB %d, lev %d)\n", + ru->proc.frame_prach,subframe, + *max_preamble_energy,*max_preamble_delay, + *max_preamble,br_flag,ce_level,levdB,lev); + } + } } + } }// preamble_index if (LOG_DUMPFLAG(PRACH)) { - int en = dB_fixed(signal_energy((int32_t*)&rxsigF[0][0],840)); + int en = dB_fixed(signal_energy((int32_t *)&rxsigF[0][0],840)); + if (en>60) { k = (12*n_ra_prb) - 6*fp->N_RB_UL; - + if (k<0) k+=fp->ofdm_symbol_size; - + k*=12; k+=13; k*=2; - + if (br_flag == 0) { - LOG_M("rxsigF.m","prach_rxF",&rxsigF[0][0],12288,1,1); - LOG_M("prach_rxF_comp0.m","prach_rxF_comp0",prachF,1024,1,1); - LOG_M("Xu.m","xu",Xu,N_ZC,1,1); - LOG_M("prach_ifft0.m","prach_t0",prach_ifft,1024,1,1); + LOG_M("rxsigF.m","prach_rxF",&rxsigF[0][0],12288,1,1); + LOG_M("prach_rxF_comp0.m","prach_rxF_comp0",prachF,1024,1,1); + LOG_M("Xu.m","xu",Xu,N_ZC,1,1); + LOG_M("prach_ifft0.m","prach_t0",prach_ifft,1024,1,1); + } else { + LOG_E(PHY,"Dumping prach (br_flag %d), k = %d (n_ra_prb %d)\n",br_flag,k,n_ra_prb); + LOG_M("rxsigF_br.m","prach_rxF_br",&rxsigF[0][0],12288,1,1); + LOG_M("prach_rxF_comp0_br.m","prach_rxF_comp0_br",prachF,1024,1,1); + LOG_M("Xu_br.m","xu_br",Xu,N_ZC,1,1); + LOG_M("prach_ifft0_br.m","prach_t0_br",prach_ifft,1024,1,1); + exit(-1); } - else { - LOG_E(PHY,"Dumping prach (br_flag %d), k = %d (n_ra_prb %d)\n",br_flag,k,n_ra_prb); - LOG_M("rxsigF_br.m","prach_rxF_br",&rxsigF[0][0],12288,1,1); - LOG_M("prach_rxF_comp0_br.m","prach_rxF_comp0_br",prachF,1024,1,1); - LOG_M("Xu_br.m","xu_br",Xu,N_ZC,1,1); - LOG_M("prach_ifft0_br.m","prach_t0_br",prach_ifft,1024,1,1); - exit(-1); - } - } } /* LOG_DUMPFLAG(PRACH) */ - if (eNB) stop_meas(&eNB->rx_prach); + if (eNB) stop_meas(&eNB->rx_prach); } #if (LTE_RRC_VERSION >= MAKE_VERSION(14, 0, 0)) void rx_prach(PHY_VARS_eNB *eNB, - RU_t *ru, - uint16_t *max_preamble, - uint16_t *max_preamble_energy, - uint16_t *max_preamble_delay, - uint16_t Nf, - uint8_t tdd_mapindex, - uint8_t br_flag) { - + RU_t *ru, + uint16_t *max_preamble, + uint16_t *max_preamble_energy, + uint16_t *max_preamble_delay, + uint16_t Nf, + uint8_t tdd_mapindex, + uint8_t br_flag) { int i; int prach_mask=0; - if (br_flag == 0) { + if (br_flag == 0) { rx_prach0(eNB,ru,max_preamble,max_preamble_energy,max_preamble_delay,Nf,tdd_mapindex,0,0); - } - else { // This is procedure for eMTC, basically handling the repetitions + } else { // This is procedure for eMTC, basically handling the repetitions prach_mask = is_prach_subframe(&eNB->frame_parms,eNB->proc.frame_prach_br,eNB->proc.subframe_prach_br); - for (i=0;i<4;i++) { - if ((eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_CElevel_enable[i]==1) && - ((prach_mask&(1<<(i+1))) > 0)) { // check that prach CE level is active now - // if first reception in group of repetitions store frame for later (in RA-RNTI for Msg2) - if (eNB->prach_vars_br.repetition_number[i]==0) eNB->prach_vars_br.first_frame[i]=eNB->proc.frame_prach_br; + for (i=0; i<4; i++) { + if ((eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_CElevel_enable[i]==1) && + ((prach_mask&(1<<(i+1))) > 0)) { // check that prach CE level is active now - // increment repetition number - eNB->prach_vars_br.repetition_number[i]++; + // if first reception in group of repetitions store frame for later (in RA-RNTI for Msg2) + if (eNB->prach_vars_br.repetition_number[i]==0) eNB->prach_vars_br.first_frame[i]=eNB->proc.frame_prach_br; - // do basic PRACH reception - rx_prach0(eNB,ru,max_preamble,max_preamble_energy,max_preamble_delay,Nf,tdd_mapindex,1,i); - - // if last repetition, clear counter - if (eNB->prach_vars_br.repetition_number[i] == eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_numRepetitionPerPreambleAttempt[i]) { - eNB->prach_vars_br.repetition_number[i]=0; + // increment repetition number + eNB->prach_vars_br.repetition_number[i]++; + // do basic PRACH reception + rx_prach0(eNB,ru,max_preamble,max_preamble_energy,max_preamble_delay,Nf,tdd_mapindex,1,i); - } + // if last repetition, clear counter + if (eNB->prach_vars_br.repetition_number[i] == eNB->frame_parms.prach_emtc_config_common.prach_ConfigInfo.prach_numRepetitionPerPreambleAttempt[i]) { + eNB->prach_vars_br.repetition_number[i]=0; + } } } } -- GitLab