Commit 8c5e8126 authored by Florian Kaltenberger's avatar Florian Kaltenberger

Added support for ARM NEON, lots of changes in openair1 and some in cmake_targets

git-svn-id: http://svn.eurecom.fr/openair4G/trunk@7543 818b1a75-f10b-46b9-bf7c-635c3b92a50f
parent 85ff3abc
......@@ -126,7 +126,7 @@ add_list_string_option(CMAKE_BUILD_TYPE "RelWithDebInfo" "Choose the type of bui
Message("Architecture is ${CMAKE_SYSTEM_PROCESSOR}")
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l")
set(C_FLAGS_PROCESSOR "-mfloat-abi=softfp -mfpu=neon")
set(C_FLAGS_PROCESSOR "-gdwarf-2 -mfloat-abi=hard -mfpu=neon -lgcc -lrt")
else (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7l")
set(C_FLAGS_PROCESSOR "-msse4.2")
endif()
......@@ -140,8 +140,8 @@ set(CMAKE_C_FLAGS
# set a flag for changes in the source code
# these changes are related to hardcoded path to include .h files
add_definitions(-DCMAKER)
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS} -ggdb -DMALLOC_CHECK_=3")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS} -ggdb -DMALLOC_CHECK_=3 -O2")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS} -g -DMALLOC_CHECK_=3")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS} -g -DMALLOC_CHECK_=3 -O2")
# Below has been put in comment because does not work with
# SVN authentication.
......@@ -778,7 +778,6 @@ set(PHY_SRC
${OPENAIR1_DIR}/PHY/TOOLS/log2_approx.c
${OPENAIR1_DIR}/PHY/TOOLS/cmult_sv.c
${OPENAIR1_DIR}/PHY/TOOLS/cmult_vv.c
${OPENAIR1_DIR}/PHY/TOOLS/cadd_vv.c
${OPENAIR1_DIR}/PHY/TOOLS/cdot_prod.c
${OPENAIR1_DIR}/PHY/TOOLS/signal_energy.c
${OPENAIR1_DIR}/PHY/TOOLS/dB_routines.c
......@@ -1692,7 +1691,7 @@ foreach(myExe dlsim ulsim pbchsim scansim mbmssim pdcchsim pucchsim prachsim syn
${XFORMS_SOURCE}
)
target_link_libraries (${myExe}
-Wl,--start-group SIMU UTIL SCHED_LIB PHY LFDS MSC ${ITTI_LIB} -Wl,--end-group
-Wl,--start-group SIMU UTIL SCHED_LIB PHY LFDS ${ITTI_LIB} -Wl,--end-group
pthread m rt ${CONFIG_LIBRARIES} ${ATLAS_LIBRARIES} ${XFORMS_LIBRARIES}
)
endforeach(myExe)
......
......@@ -10,5 +10,6 @@ set(RANDOM_BF False)
set(PBS_SIM False)
set(PERFECT_CE False)
set(NAS_UE False)
set(MESSAGE_CHART_GENERATOR False)
include(${CMAKE_CURRENT_SOURCE_DIR}/../CMakeLists.txt)
......@@ -31,8 +31,9 @@
author: raymond.knopp@eurecom.fr
date: 10.2009
*/
#include "defs.h"
//#include "lte_interleaver_inline.h"
#ifndef TC_MAIN
//#include "defs.h"
#endif
#include "extern_3GPPinterleaver.h"
......
This diff is collapsed.
TURBO_SRC = 3gpplte.c 3gpplte_turbo_decoder_sse.c crc_byte.c
TURBO_SRC = 3gpplte_sse.c 3gpplte_turbo_decoder_sse.c crc_byte.c
RATE13CC_SRC = ccoding_byte_lte.c viterbi_lte.c crc_byte.c
RATE12CC_SRC = ccoding_byte.c viterbi.c crc_byte.c
all: turbolte_test rate13cc_test rate12cc_test run_turbo run_rate13cc run_rate13ccdab run_rate12cc
all: 3gpplte_sse
turbolte_test: $(TURBO_SRC)
gcc -o turbo_test $(TURBO_SRC) -DTEST_DEBUG -DUSER_MODE -msse2 -mssse3 -Wall
3gpplte_sse: $(TURBO_SRC)
gcc -o 3gpplte_sse 3gpplte_sse.c -msse4 -Wall -g -ggdb -DMAIN
rate13cc_test: $(RATE13CC_SRC)
gcc -o rate13cc_test $(RATE13CC_SRC) -DTEST_DEBUG -DUSER_MODE -msse2 -mssse3 -Wall
rate12cc_test: $(RATE12CC_SRC)
gcc -o rate12cc_test $(RATE12CC_SRC) -DTEST_DEBUG -DUSER_MODE -msse2 -mssse3 -Wall
run_turbo: turbolte_test
./turbo_test
run_rate13cc: rate13cc_test
./rate13cc_test
run_rate13ccdab: rate13cc_test
./rate13cc_test -d
run_rate12cc: rate12cc_test
./rate12cc_test
clean:
rm *.o
......@@ -55,22 +55,22 @@ unsigned char ccodelte_table_rev[128]; // for receiver
void
ccodelte_encode (unsigned int numbits,
unsigned char add_crc,
unsigned char *inPtr,
unsigned char *outPtr,
unsigned short rnti)
ccodelte_encode (int32_t numbits,
uint8_t add_crc,
uint8_t *inPtr,
uint8_t *outPtr,
uint16_t rnti)
{
unsigned int state;
uint32_t state;
unsigned char c, out, first_bit;
char shiftbit=0;
unsigned short c16;
unsigned short next_last_byte=0;
unsigned int crc=0;
uint8_t c, out, first_bit;
int8_t shiftbit=0;
uint16_t c16;
uint16_t next_last_byte=0;
uint32_t crc=0;
#ifdef DEBUG_CCODE
unsigned int dummy=0;
uint32_t dummy=0;
#endif //DEBUG_CCODE
/* The input bit is shifted in position 8 of the state.
......@@ -80,20 +80,19 @@ ccodelte_encode (unsigned int numbits,
if (add_crc == 1) {
crc = crc8(inPtr,numbits);
first_bit = 2;
c = (unsigned char)(crc>>24);
c = (uint8_t)(crc>>24);
} else if (add_crc == 2) {
crc = crc16(inPtr,numbits);
#ifdef DEBUG_CCODE
printf("ccode_lte : crc %x\n",crc);
#endif
// scramble with RNTI
crc ^= (((unsigned int)rnti)<<16);
crc ^= (((uint32_t)rnti)<<16);
#ifdef DEBUG_CCODE
printf("ccode_lte : crc %x (rnti %x)\n",crc,rnti);
#endif
first_bit = 2;
// c = (unsigned char)(crc>>24);
c = (unsigned char)((crc>>16)&0xff);
c = (uint8_t)((crc>>16)&0xff);
} else {
next_last_byte = numbits>>3;
first_bit = (numbits-6)&7;
......@@ -182,7 +181,7 @@ ccodelte_encode (unsigned int numbits,
// now code 8-bit CRC for UCI
if (add_crc == 1) {
c = (unsigned char)(crc>>24);
c = (uint8_t)(crc>>24);
// for (shiftbit = 0; (shiftbit<8);shiftbit++) {
for (shiftbit = 7; (shiftbit>=0); shiftbit--) {
......@@ -209,7 +208,7 @@ ccodelte_encode (unsigned int numbits,
// now code 16-bit CRC for DCI
if (add_crc == 2) {
c16 = (unsigned short)(crc>>16);
c16 = (uint16_t)(crc>>16);
// for (shiftbit = 0; (shiftbit<16);shiftbit++) {
for (shiftbit = 15; (shiftbit>=0); shiftbit--) {
......
......@@ -320,7 +320,7 @@ void threegpplte_turbo_encoder(uint8_t *input,
uint16_t interleaver_f2);
/** \fn void ccodelte_encode(uint32_t numbits,uint8_t add_crc, uint8_t *inPtr,uint8_t *outPtr,uint16_t rnti)
/** \fn void ccodelte_encode(int32_t numbits,uint8_t add_crc, uint8_t *inPtr,uint8_t *outPtr,uint16_t rnti)
\brief This function implements the LTE convolutional code of rate 1/3
with a constraint length of 7 bits. The inputs are bit packed in octets
(from MSB to LSB). Trellis tail-biting is included here.
......@@ -331,7 +331,7 @@ void threegpplte_turbo_encoder(uint8_t *input,
@param rnti RNTI for CRC scrambling
*/
void
ccodelte_encode (uint32_t numbits,
ccodelte_encode (int32_t numbits,
uint8_t add_crc,
uint8_t *inPtr,
uint8_t *outPtr,
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -299,11 +299,17 @@ void freq_equalization(LTE_DL_FRAME_PARMS *frame_parms,
{
uint16_t re;
int16_t amp;
#if defined(__x86_64__) || defined(__i386__)
__m128i *ul_ch_mag128,*ul_ch_magb128,*rxdataF_comp128;
rxdataF_comp128 = (__m128i *)&rxdataF_comp[0][symbol*frame_parms->N_RB_DL*12];
ul_ch_mag128 = (__m128i *)&ul_ch_mag[0][symbol*frame_parms->N_RB_DL*12];
ul_ch_magb128 = (__m128i *)&ul_ch_magb[0][symbol*frame_parms->N_RB_DL*12];
#elif defined(__arm__)
int16x8_t *ul_ch_mag128,*ul_ch_magb128,*rxdataF_comp128;
rxdataF_comp128 = (int16x8_t*)&rxdataF_comp[0][symbol*frame_parms->N_RB_DL*12];
ul_ch_mag128 = (int16x8_t*)&ul_ch_mag[0][symbol*frame_parms->N_RB_DL*12];
ul_ch_magb128 = (int16x8_t*)&ul_ch_magb[0][symbol*frame_parms->N_RB_DL*12];
#endif
for (re=0; re<(Msc_RS>>2); re++) {
......@@ -313,15 +319,25 @@ void freq_equalization(LTE_DL_FRAME_PARMS *frame_parms,
amp=255;
// printf("freq_eq: symbol %d re %d => %d,%d,%d, (%d) (%d,%d) => ",symbol,re,*((int16_t*)(&ul_ch_mag128[re])),amp,inv_ch[8*amp],*((int16_t*)(&ul_ch_mag128[re]))*inv_ch[8*amp],*(int16_t*)&(rxdataF_comp128[re]),*(1+(int16_t*)&(rxdataF_comp128[re])));
#if defined(__x86_64__) || defined(__i386__)
rxdataF_comp128[re] = _mm_mullo_epi16(rxdataF_comp128[re],*((__m128i *)&inv_ch[8*amp]));
if (Qm==4)
ul_ch_mag128[re] = _mm_set1_epi16(324); // this is 512*2/sqrt(10)
ul_ch_mag128[re] = _mm_set1_epi16(324); // this is 512*2/sqrt(10)
else {
ul_ch_mag128[re] = _mm_set1_epi16(316); // this is 512*4/sqrt(42)
ul_ch_mag128[re] = _mm_set1_epi16(316); // this is 512*4/sqrt(42)
ul_ch_magb128[re] = _mm_set1_epi16(158); // this is 512*2/sqrt(42)
}
#elif defined(__arm__)
rxdataF_comp128[re] = vmulq_s16(rxdataF_comp128[re],*((int16x8_t *)&inv_ch[8*amp]));
if (Qm==4)
ul_ch_mag128[re] = vdupq_n_s16(324); // this is 512*2/sqrt(10)
else {
ul_ch_mag128[re] = vdupq_n_s16(316); // this is 512*4/sqrt(42)
ul_ch_magb128[re] = vdupq_n_s16(158); // this is 512*2/sqrt(42)
}
#endif
// printf("(%d,%d)\n",*(int16_t*)&(rxdataF_comp128[re]),*(1+(int16_t*)&(rxdataF_comp128[re])));
}
......
......@@ -35,7 +35,11 @@
#include "PHY/defs.h"
//#define DEBUG_PHY
#if defined(__x86_64__) || defined(__i386__)
__m128i avg128F;
#elif defined(__arm__)
int32x4_t avg128F;
#endif
//compute average channel_level on each (TX,RX) antenna pair
int dl_channel_level(int16_t *dl_ch,
......@@ -43,10 +47,15 @@ int dl_channel_level(int16_t *dl_ch,
{
int16_t rb;
#if defined(__x86_64__) || defined(__i386__)
__m128i *dl_ch128;
#elif defined(__arm__)
int16x4_t *dl_ch128;
#endif
int avg;
//clear average level
#if defined(__x86_64__) || defined(__i386__)
avg128F = _mm_setzero_si128();
dl_ch128=(__m128i *)dl_ch;
......@@ -59,7 +68,25 @@ int dl_channel_level(int16_t *dl_ch,
dl_ch128+=3;
}
#elif defined(__arm__)
avg128F = vdupq_n_s32(0);
dl_ch128=(int16x4_t *)dl_ch;
for (rb=0; rb<frame_parms->N_RB_DL; rb++) {
avg128F = vqaddq_s32(avg128F,vmull_s16(dl_ch128[0],dl_ch128[0]));
avg128F = vqaddq_s32(avg128F,vmull_s16(dl_ch128[1],dl_ch128[1]));
avg128F = vqaddq_s32(avg128F,vmull_s16(dl_ch128[2],dl_ch128[2]));
avg128F = vqaddq_s32(avg128F,vmull_s16(dl_ch128[3],dl_ch128[3]));
avg128F = vqaddq_s32(avg128F,vmull_s16(dl_ch128[4],dl_ch128[4]));
avg128F = vqaddq_s32(avg128F,vmull_s16(dl_ch128[5],dl_ch128[5]));
dl_ch128+=6;
}
#endif
DevAssert( frame_parms->N_RB_DL );
avg = (((int*)&avg128F)[0] +
((int*)&avg128F)[1] +
......@@ -67,10 +94,10 @@ int dl_channel_level(int16_t *dl_ch,
((int*)&avg128F)[3])/(frame_parms->N_RB_DL*12);
#if defined(__x86_64__) || defined(__i386__)
_mm_empty();
_m_empty();
#endif
return(avg);
}
......
......@@ -46,14 +46,15 @@
//#include "defs.h"
#include "PHY/defs.h"
#include "PHY/extern.h"
#include "pss6144.h"
#if defined(__x86_64__) || defined(__i386__)
#include "pss6144.h"
extern void print_shorts(char*,__m128i*);
#endif
void lte_sync_timefreq(PHY_VARS_UE *ue,int band,unsigned int DL_freq)
{
#if defined(__x86_64__) || defined(__i386__)
UE_SCAN_INFO_t *scan_info = &ue->scan_info[band];
int16_t spectrum[12288] __attribute__((aligned(16)));
int16_t spectrum_p5ms[12288] __attribute__((aligned(16)));
......@@ -358,5 +359,6 @@ void lte_sync_timefreq(PHY_VARS_UE *ue,int band,unsigned int DL_freq)
for (band_idx=0; band_idx<10; band_idx++)
printf("pss 2: level %d dB, freq %u\n", dB_fixed(scan_info->amp[2][band_idx]),scan_info->freq_offset_Hz[2][band_idx]);
#endif
}
......@@ -42,34 +42,26 @@
//#define DEBUG_MEAS
#ifdef USER_MODE
void print_shorts(char *s,__m128i *x)
void print_shorts(char *s,short *x)
{
short *tempb = (short *)x;
printf("%s : %d,%d,%d,%d,%d,%d,%d,%d\n",s,
tempb[0],tempb[1],tempb[2],tempb[3],tempb[4],tempb[5],tempb[6],tempb[7]
x[0],x[1],x[2],x[3],x[4],x[5],x[6],x[7]
);
}
void print_ints(char *s,__m128i *x)
void print_ints(char *s,int *x)
{
int *tempb = (int *)x;
printf("%s : %d,%d,%d,%d\n",s,
tempb[0],tempb[1],tempb[2],tempb[3]
x[0],x[1],x[2],x[3]
);
}
#endif
__m128i pmi128_re __attribute__ ((aligned(16)));
__m128i pmi128_im __attribute__ ((aligned(16)));
__m128i mmtmpPMI0 __attribute__ ((aligned(16)));
__m128i mmtmpPMI1 __attribute__ ((aligned(16)));
__m128i mmtmpPMI2 __attribute__ ((aligned(16)));
__m128i mmtmpPMI3 __attribute__ ((aligned(16)));
int16_t get_PL(uint8_t Mod_id,uint8_t CC_id,uint8_t eNB_index)
{
......@@ -421,7 +413,11 @@ void lte_ue_measurements(PHY_VARS_UE *phy_vars_ue,
//int rx_power[NUMBER_OF_CONNECTED_eNB_MAX];
int i;
unsigned int limit,subband;
#if defined(__x86_64__) || defined(__i386__)
__m128i *dl_ch0_128,*dl_ch1_128;
#elif defined(__arm__)
int16x8_t *dl_ch0_128, *dl_ch1_128;
#endif
int *dl_ch0,*dl_ch1;
LTE_DL_FRAME_PARMS *frame_parms = &phy_vars_ue->lte_frame_parms;
int nb_subbands,subband_size,last_subband_size;
......@@ -605,26 +601,30 @@ void lte_ue_measurements(PHY_VARS_UE *phy_vars_ue,
for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
// skip the first 4 RE due to interpolation filter length of 5 (not possible to skip 5 due to 128i alignment, must be multiple of 128bit)
#if defined(__x86_64__) || defined(__i386__)
__m128i pmi128_re,pmi128_im,mmtmpPMI0,mmtmpPMI1,mmtmpPMI2,mmtmpPMI3;
dl_ch0_128 = (__m128i *)&phy_vars_ue->lte_ue_common_vars.dl_ch_estimates[eNB_id][aarx][4];
dl_ch1_128 = (__m128i *)&phy_vars_ue->lte_ue_common_vars.dl_ch_estimates[eNB_id][2+aarx][4];
#elif defined(__arm__)
int32x4_t pmi128_re,pmi128_im,mmtmpPMI0,mmtmpPMI1,mmtmpPMI0b,mmtmpPMI1b;
/*
#ifdef DEBUG_PHY
if(eNB_id==0){
print_shorts("Ch0",dl_ch0_128);
print_shorts("Ch1",dl_ch1_128);
printf("eNB_ID = %d\n",eNB_id);
}
#endif
*/
dl_ch0_128 = (int16x8_t *)&phy_vars_ue->lte_ue_common_vars.dl_ch_estimates[eNB_id][aarx][4];
dl_ch1_128 = (int16x8_t *)&phy_vars_ue->lte_ue_common_vars.dl_ch_estimates[eNB_id][2+aarx][4];
#endif
for (subband=0; subband<nb_subbands; subband++) {
// pmi
#if defined(__x86_64__) || defined(__i386__)
pmi128_re = _mm_setzero_si128();
pmi128_im = _mm_setzero_si128();
#elif defined(__arm__)
pmi128_re = vdupq_n_s32(0);
pmi128_im = vdupq_n_s32(0);
#endif
// limit is the number of groups of 4 REs in a subband (12 = 4 RBs, 3 = 1 RB)
// for 5 MHz channelization, there are 7 subbands, 6 of size 4 RBs and 1 of size 1 RB
if ((N_RB_DL==6) || (subband<(nb_subbands-1)))
......@@ -636,52 +636,33 @@ void lte_ue_measurements(PHY_VARS_UE *phy_vars_ue,
// For each RE in subband perform ch0 * conj(ch1)
// multiply by conjugated channel
// if(eNB_id==0){
//print_shorts("ch0",dl_ch0_128);
//print_shorts("ch1",dl_ch1_128);
// }
// if(i==0){
mmtmpPMI0 = _mm_setzero_si128();
mmtmpPMI1 = _mm_setzero_si128();
// }
// if(eNB_id==0)
// print_ints("Pre_re",&mmtmpPMI0);
mmtmpPMI0 = _mm_madd_epi16(dl_ch0_128[0],dl_ch1_128[0]);
// if(eNB_id==0)
// print_ints("re",&mmtmpPMI0);
// mmtmpPMI0 contains real part of 4 consecutive outputs (32-bit)
// print_shorts("Ch1",dl_ch1_128);
#if defined(__x86_64__) || defined(__i386__)
mmtmpPMI1 = _mm_shufflelo_epi16(dl_ch1_128[0],_MM_SHUFFLE(2,3,0,1));//_MM_SHUFFLE(2,3,0,1)
// print_shorts("mmtmpPMI1:",&mmtmpPMI1);
mmtmpPMI1 = _mm_shufflehi_epi16(mmtmpPMI1,_MM_SHUFFLE(2,3,0,1));
// print_shorts("mmtmpPMI1:",&mmtmpPMI1);
mmtmpPMI1 = _mm_sign_epi16(mmtmpPMI1,*(__m128i*)&conjugate[0]);
// print_shorts("mmtmpPMI1:",&mmtmpPMI1);
mmtmpPMI1 = _mm_madd_epi16(mmtmpPMI1,dl_ch0_128[0]);
// if(eNB_id==0)
// print_ints("im",&mmtmpPMI1);
// mmtmpPMI1 contains imag part of 4 consecutive outputs (32-bit)
pmi128_re = _mm_add_epi32(pmi128_re,mmtmpPMI0);
pmi128_im = _mm_add_epi32(pmi128_im,mmtmpPMI1);
#elif defined(__arm__)
mmtmpPMI0 = vmull_s16(((int16x4_t*)dl_ch0_128)[0], ((int16x4_t*)dl_ch1_128)[0]);
mmtmpPMI1 = vmull_s16(((int16x4_t*)dl_ch0_128)[1], ((int16x4_t*)dl_ch1_128)[1]);
pmi128_re = vqaddq_s32(pmi128_re,vcombine_s32(vpadd_s32(vget_low_s32(mmtmpPMI0),vget_high_s32(mmtmpPMI0)),vpadd_s32(vget_low_s32(mmtmpPMI1),vget_high_s32(mmtmpPMI1))));
mmtmpPMI0b = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)dl_ch0_128)[0],*(int16x4_t*)conjugate)), ((int16x4_t*)dl_ch1_128)[0]);
mmtmpPMI1b = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)dl_ch0_128)[1],*(int16x4_t*)conjugate)), ((int16x4_t*)dl_ch1_128)[1]);
pmi128_im = vqaddq_s32(pmi128_im,vcombine_s32(vpadd_s32(vget_low_s32(mmtmpPMI0b),vget_high_s32(mmtmpPMI0b)),vpadd_s32(vget_low_s32(mmtmpPMI1b),vget_high_s32(mmtmpPMI1b))));
#endif
dl_ch0_128++;
dl_ch1_128++;
}
phy_vars_ue->PHY_measurements.subband_pmi_re[eNB_id][subband][aarx] = (((int *)&pmi128_re)[0] + ((int *)&pmi128_re)[1] + ((int *)&pmi128_re)[2] + ((int *)&pmi128_re)[3])>>2;
// if(eNB_id==0)
// printf("in lte_ue_measurements.c: pmi_re %d\n",phy_vars_ue->PHY_measurements.subband_pmi_re[eNB_id][subband][aarx]);
phy_vars_ue->PHY_measurements.subband_pmi_im[eNB_id][subband][aarx] = (((int *)&pmi128_im)[0] + ((int *)&pmi128_im)[1] + ((int *)&pmi128_im)[2] + ((int *)&pmi128_im)[3])>>2;
// if(eNB_id==0)
// printf("in lte_ue_measurements.c: pmi_im %d\n",phy_vars_ue->PHY_measurements.subband_pmi_im[eNB_id][subband][aarx]);
phy_vars_ue->PHY_measurements.wideband_pmi_re[eNB_id][aarx] += phy_vars_ue->PHY_measurements.subband_pmi_re[eNB_id][subband][aarx];
phy_vars_ue->PHY_measurements.wideband_pmi_im[eNB_id][aarx] += phy_vars_ue->PHY_measurements.subband_pmi_im[eNB_id][subband][aarx];
// msg("subband_pmi[%d][%d][%d] => (%d,%d)\n",eNB_id,subband,aarx,phy_vars_ue->PHY_measurements.subband_pmi_re[eNB_id][subband][aarx],phy_vars_ue->PHY_measurements.subband_pmi_im[eNB_id][subband][aarx]);
} // subband loop
} // rx antenna loop
} // if frame_parms->mode1_flag == 0
......@@ -742,9 +723,10 @@ void lte_ue_measurements(PHY_VARS_UE *phy_vars_ue,
// printf("in lte_ue_measurements: selected rx_antenna[eNB_id==0]:%u\n", phy_vars_ue->PHY_measurements.selected_rx_antennas[eNB_id][i]);
} // eNB_id loop
#if defined(__x86_64__) || defined(__i386__)
_mm_empty();
_m_empty();
#endif
}
......
......@@ -106,9 +106,13 @@ int32_t lte_ul_channel_estimation(PHY_VARS_eNB *phy_vars_eNB,
*temp_out_fft_1_ptr = (int32_t*)0,*out_fft_ptr_1 = (int32_t*)0,
*temp_in_ifft_ptr = (int32_t*)0;
#if defined(__x86_64__) || defined(__i386__)
__m128i *rxdataF128,*ul_ref128,*ul_ch128;
__m128i mmtmpU0,mmtmpU1,mmtmpU2,mmtmpU3;
#elif defined(__arm__)
int16x8_t *rxdataF128,*ul_ref128,*ul_ch128;
int32x4_t mmtmp0,mmtmp1,mmtmp_re,mmtmp_im;
#endif
Msc_RS = N_rb_alloc*12;
cyclic_shift = (frame_parms->pusch_config_common.ul_ReferenceSignalsPUSCH.cyclicShift +
......@@ -156,11 +160,18 @@ int32_t lte_ul_channel_estimation(PHY_VARS_eNB *phy_vars_eNB,
for (aa=0; aa<nb_antennas_rx; aa++) {
// msg("Componentwise prod aa %d, symbol_offset %d,ul_ch_estimates %p,ul_ch_estimates[aa] %p,ul_ref_sigs_rx[0][0][Msc_RS_idx] %p\n",aa,symbol_offset,ul_ch_estimates,ul_ch_estimates[aa],ul_ref_sigs_rx[0][0][Msc_RS_idx]);
#if defined(__x86_64__) || defined(__i386__)
rxdataF128 = (__m128i *)&rxdataF_ext[aa][symbol_offset];
ul_ch128 = (__m128i *)&ul_ch_estimates[aa][symbol_offset];
ul_ref128 = (__m128i *)ul_ref_sigs_rx[u][v][Msc_RS_idx];
#elif defined(__arm__)
rxdataF128 = (int16x8_t *)&rxdataF_ext[aa][symbol_offset];
ul_ch128 = (int16x8_t *)&ul_ch_estimates[aa][symbol_offset];
ul_ref128 = (int16x8_t *)ul_ref_sigs_rx[u][v][Msc_RS_idx];
#endif
for (i=0; i<Msc_RS/12; i++) {
#if defined(__x86_64__) || defined(__i386__)
// multiply by conjugated channel
mmtmpU0 = _mm_madd_epi16(ul_ref128[0],rxdataF128[0]);
// mmtmpU0 contains real part of 4 consecutive outputs (32-bit)
......@@ -204,7 +215,50 @@ int32_t lte_ul_channel_estimation(PHY_VARS_eNB *phy_vars_eNB,
mmtmpU3 = _mm_unpackhi_epi32(mmtmpU0,mmtmpU1);
ul_ch128[2] = _mm_packs_epi32(mmtmpU2,mmtmpU3);
#elif defined(__arm__)
mmtmp0 = vmull_s16(((int16x4_t*)ul_ref128)[0],((int16x4_t*)rxdataF128)[0]);
mmtmp1 = vmull_s16(((int16x4_t*)ul_ref128)[1],((int16x4_t*)rxdataF128)[1]);
mmtmp_re = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
mmtmp0 = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)ul_ref128)[0],*(int16x4_t*)conjugate)), ((int16x4_t*)rxdataF128)[0]);
mmtmp1 = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)ul_ref128)[1],*(int16x4_t*)conjugate)), ((int16x4_t*)rxdataF128)[1]);
mmtmp_im = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
ul_ch128[0] = vcombine_s16(vmovn_s32(mmtmp_re),vmovn_s32(mmtmp_im));
ul_ch128++;
ul_ref128++;
rxdataF128++;
mmtmp0 = vmull_s16(((int16x4_t*)ul_ref128)[0],((int16x4_t*)rxdataF128)[0]);
mmtmp1 = vmull_s16(((int16x4_t*)ul_ref128)[1],((int16x4_t*)rxdataF128)[1]);
mmtmp_re = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
mmtmp0 = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)ul_ref128)[0],*(int16x4_t*)conjugate)), ((int16x4_t*)rxdataF128)[0]);
mmtmp1 = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)ul_ref128)[1],*(int16x4_t*)conjugate)), ((int16x4_t*)rxdataF128)[1]);
mmtmp_im = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
ul_ch128[0] = vcombine_s16(vmovn_s32(mmtmp_re),vmovn_s32(mmtmp_im));
ul_ch128++;
ul_ref128++;
rxdataF128++;
mmtmp0 = vmull_s16(((int16x4_t*)ul_ref128)[0],((int16x4_t*)rxdataF128)[0]);
mmtmp1 = vmull_s16(((int16x4_t*)ul_ref128)[1],((int16x4_t*)rxdataF128)[1]);
mmtmp_re = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
mmtmp0 = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)ul_ref128)[0],*(int16x4_t*)conjugate)), ((int16x4_t*)rxdataF128)[0]);
mmtmp1 = vmull_s16(vrev32_s16(vmul_s16(((int16x4_t*)ul_ref128)[1],*(int16x4_t*)conjugate)), ((int16x4_t*)rxdataF128)[1]);
mmtmp_im = vcombine_s32(vpadd_s32(vget_low_s32(mmtmp0),vget_high_s32(mmtmp0)),
vpadd_s32(vget_low_s32(mmtmp1),vget_high_s32(mmtmp1)));
ul_ch128[0] = vcombine_s16(vmovn_s32(mmtmp_re),vmovn_s32(mmtmp_im));
ul_ch128++;
ul_ref128++;
rxdataF128++;
#endif
ul_ch128+=3;
ul_ref128+=3;
rxdataF128+=3;
......@@ -538,17 +592,17 @@ int32_t lte_ul_channel_estimation(PHY_VARS_eNB *phy_vars_eNB,
// msg("sym: %d, current_phase1: %d, ru: %d + j%d, current_phase2: %d, ru: %d + j%d\n",k,current_phase1,ru1[2*current_phase1],ru1[2*current_phase1+1],current_phase2,ru2[2*current_phase2],ru2[2*current_phase2+1]);
// rotate channel estimates by estimated phase
rotate_cpx_vector_norep((int16_t*) ul_ch1,
&ru1[2*current_phase1],
(int16_t*) &ul_ch_estimates[aa][frame_parms->N_RB_UL*12*k],
Msc_RS,
15);
rotate_cpx_vector_norep((int16_t*) ul_ch2,
&ru2[2*current_phase2],
(int16_t*) &tmp_estimates[0],
Msc_RS,
15);
rotate_cpx_vector((int16_t*) ul_ch1,
&ru1[2*current_phase1],
(int16_t*) &ul_ch_estimates[aa][frame_parms->N_RB_UL*12*k],
Msc_RS,
15);
rotate_cpx_vector((int16_t*) ul_ch2,
&ru2[2*current_phase2],
(int16_t*) &tmp_estimates[0],
Msc_RS,
15);
// Combine the two rotated estimates
multadd_complex_vector_real_scalar((int16_t*) &ul_ch_estimates[aa][frame_parms->N_RB_UL*12*k],SCALE,(int16_t*) &ul_ch_estimates[aa][frame_parms->N_RB_UL*12*k],1,Msc_RS);
......@@ -664,11 +718,11 @@ int32_t lte_srs_channel_estimation(LTE_DL_FRAME_PARMS *frame_parms,
//write_output("eNb_rxF.m","rxF",&eNb_common_vars->rxdataF[0][aa][2*frame_parms->ofdm_symbol_size*symbol],2*(frame_parms->ofdm_symbol_size),2,1);
//write_output("eNb_srs.m","srs_eNb",eNb_common_vars->srs,(frame_parms->ofdm_symbol_size),1,1);
mult_cpx_vector_norep((int16_t*) &eNb_common_vars->rxdataF[eNb_id][aa][2*frame_parms->ofdm_symbol_size*symbol],
(int16_t*) eNb_srs_vars->srs,
(int16_t*) eNb_srs_vars->srs_ch_estimates[eNb_id][aa],
frame_parms->ofdm_symbol_size,
15);
mult_cpx_conj_vector((int16_t*) &eNb_common_vars->rxdataF[eNb_id][aa][2*frame_parms->ofdm_symbol_size*symbol],
(int16_t*) eNb_srs_vars->srs,
(int16_t*) eNb_srs_vars->srs_ch_estimates[eNb_id][aa],
frame_parms->ofdm_symbol_size,
15);
//msg("SRS channel estimation cmult out\n");
#ifdef USER_MODE
......@@ -695,6 +749,7 @@ int16_t lte_ul_freq_offset_estimation(LTE_DL_FRAME_PARMS *frame_parms,
uint16_t nb_rb)
{
#if defined(__x86_64__) || defined(__i386__)
int k, rb;
int a_idx = 64;
uint8_t conj_flag = 0;
......@@ -830,4 +885,7 @@ int16_t lte_ul_freq_offset_estimation(LTE_DL_FRAME_PARMS *frame_parms,
phase_idx = -phase_idx;
return(phase_idx);
#elif defined(__arm__)
return(0);
#endif
}
......@@ -94,9 +94,9 @@ int lte_dl_cell_spec_SS(PHY_VARS_eNB *phy_vars_eNB,
output[k] = qpsk[(phy_vars_eNB->lte_gold_table[Ns][l][mprime_dword]>>(2*mprime_qpsk_symb))&3];
//output[k] = (lte_gold_table[eNB_offset][Ns][l][mprime_dword]>>(2*mprime_qpsk_symb))&3;
#ifdef DEBUG_DL_CELL_SPEC
debug_msg("Ns %d, l %d, m %d,mprime_dword %d, mprime_qpsk_symbol %d\n",
msg("Ns %d, l %d, m %d,mprime_dword %d, mprime_qpsk_symbol %d\n",
Ns,l,m,mprime_dword,mprime_qpsk_symb);
debug_msg("index = %d (k %d)\n",(phy_vars_eNB->lte_gold_table[Ns][l][mprime_dword]>>(2*mprime_qpsk_symb))&3,k);
msg("index = %d (k %d)\n",(phy_vars_eNB->lte_gold_table[Ns][l][mprime_dword]>>(2*mprime_qpsk_symb))&3,k);
#endif
mprime++;
......
This diff is collapsed.
......@@ -139,7 +139,7 @@ typedef struct {
/// Concatenated "e"-sequences (for definition see 36-212 V8.6 2009-03, p.17-18)
uint8_t e[MAX_NUM_CHANNEL_BITS];
/// Turbo-code outputs (36-212 V8.6 2009-03, p.12
uint8_t d[MAX_NUM_DLSCH_SEGMENTS][(96+3+(3*6144))];
uint8_t *d[MAX_NUM_DLSCH_SEGMENTS];//[(96+3+(3*6144))];
/// Sub-block interleaver outputs (36-212 V8.6 2009-03, p.16-17)
uint8_t w[MAX_NUM_DLSCH_SEGMENTS][3*6144];
/// Number of code segments (for definition see 36-212 V8.6 2009-03, p.9)
......
......@@ -100,6 +100,10 @@ void free_eNB_dlsch(LTE_eNB_DLSCH_t *dlsch)
free16(dlsch->harq_processes[i]->c[r],((r==0)?8:0) + 3+768);
dlsch->harq_processes[i]->c[r] = NULL;
}
if (dlsch->harq_processes[i]->d[r]) {
free16(dlsch->harq_processes[i]->d[r],(96+3+(3*6144)));
dlsch->harq_processes[i]->d[r] = NULL;
}
}
free16(dlsch->harq_processes[i],sizeof(LTE_DL_eNB_HARQ_t));