Commit b4635ac4 authored by knopp's avatar knopp

Clean-up and bug-fixes from S. Held (IMST)

git-svn-id: http://svn.eurecom.fr/openair4G/trunk@6420 818b1a75-f10b-46b9-bf7c-635c3b92a50f
parent e531a4f3
......@@ -567,6 +567,7 @@ WARN_LOGFILE =
INPUT = ../../PHY/impl_defs_top.h \
../../PHY/impl_defs_lte.h \
../../PHY/defs.h \
../../PHY/sse_intrin.h \
../../PHY/CODING/defs.h \
../../PHY/TOOLS/defs.h \
../../PHY/LTE_TRANSPORT/defs.h \
......
......@@ -36,7 +36,7 @@
#include "extern_3GPPinterleaver.h"
#include <stdlib.h>
#include "smmintrin.h"
#include "PHY/sse_intrin.h"
//#define DEBUG_TURBO_ENCODER 1
#define CALLGRIND 1
......
......@@ -34,10 +34,10 @@
*/
#include "emmintrin.h"
#include "PHY/defs.h"
#include "PHY/CODING/defs.h"
#include "PHY/CODING/lte_interleaver_inline.h"
#include "PHY/sse_intrin.h"
/*
// 3gpp2 polynomials
......
......@@ -42,10 +42,9 @@
///
///
#include "emmintrin.h"
#include "smmintrin.h"
#include "PHY/sse_intrin.h"
#ifndef TEST_DEBUG
#ifndef TEST_DEBUG
#include "PHY/defs.h"
#include "PHY/CODING/defs.h"
#include "PHY/CODING/lte_interleaver_inline.h"
......
......@@ -44,8 +44,7 @@
///
///
#include "emmintrin.h"
#include "smmintrin.h"
#include "PHY/sse_intrin.h"
#ifndef TEST_DEBUG
#include "PHY/defs.h"
......
......@@ -43,8 +43,7 @@
///
///
#include "emmintrin.h"
#include "smmintrin.h"
#include "PHY/sse_intrin.h"
#ifndef TEST_DEBUG
#include "PHY/defs.h"
......
......@@ -55,7 +55,7 @@
#define F1 3
#define F2 10
#include "emmintrin.h"
#include "PHY/sse_intrin.h"
#define sgn(a) (((a)<0) ? 0 : 1)
......
......@@ -34,7 +34,7 @@
#ifndef EXPRESSMIMO_TARGET
#include "emmintrin.h"
#include "PHY/sse_intrin.h"
#endif //EXPRESSMIMO_TARGET
extern unsigned char ccodedot11_table[128],ccodedot11_table_rev[128];
......@@ -212,9 +212,9 @@ void phy_viterbi_dot11_sse2(char *y,unsigned char *decoded_bytes,unsigned short
// set initial metrics
metrics0_15 = _mm_cvtsi32_si128(INIT0);
metrics16_31 = _mm_xor_si128(metrics16_31,metrics16_31);
metrics32_47 = _mm_xor_si128(metrics32_47,metrics32_47);
metrics48_63 = _mm_xor_si128(metrics32_47,metrics32_47);
metrics16_31 = _mm_setzero_si128();
metrics32_47 = _mm_setzero_si128();
metrics48_63 = _mm_setzero_si128();
}
rescale = _mm_cvtsi32_si128(RESCALE);
......
......@@ -50,7 +50,7 @@
#endif
#ifndef EXPRESSMIMO_TARGET
#include "emmintrin.h"
#include "PHY/sse_intrin.h"
#endif //EXPRESSMIMO_TARGET
extern uint8_t ccodelte_table[128],ccodelte_table_rev[128];
......@@ -163,10 +163,10 @@ void phy_viterbi_lte_sse2(int8_t *y,uint8_t *decoded_bytes,uint16_t n) {
// set initial metrics
//debug_msg("Doing viterbi\n");
metrics0_15 = _mm_xor_si128(metrics0_15,metrics0_15);
metrics16_31 = _mm_xor_si128(metrics16_31,metrics16_31);
metrics32_47 = _mm_xor_si128(metrics32_47,metrics32_47);
metrics48_63 = _mm_xor_si128(metrics32_47,metrics32_47);
metrics0_15 = _mm_setzero_si128();
metrics16_31 = _mm_setzero_si128();
metrics32_47 = _mm_setzero_si128();
metrics48_63 = _mm_setzero_si128();
#ifndef USER_MODE
//debug_msg("Doing viterbi 2\n");
#endif
......
......@@ -28,8 +28,7 @@
*******************************************************************************/
#include "PHY/defs.h"
#include "PHY/extern.h"
#include <emmintrin.h>
#include <xmmintrin.h>
#include "PHY/sse_intrin.h"
// This is 512/(1:256) in __m128i format
int16_t inv_ch[256*8] = {512,512,512,512,512,512,512,512,
......
......@@ -29,13 +29,8 @@
#include "PHY/defs.h"
#include "PHY/extern.h"
#include "emmintrin.h"
#include "PHY/sse_intrin.h"
#ifdef __SSE3__
#include "pmmintrin.h"
#include "tmmintrin.h"
#else
#endif
//#define k1 1000
#define k1 1024
......
......@@ -46,7 +46,7 @@ int dl_channel_level(int16_t *dl_ch,
int avg;
//clear average level
avg128F = _mm_xor_si128(avg128F,avg128F);
avg128F = _mm_setzero_si128();
dl_ch128=(__m128i *)dl_ch;
for (rb=0;rb<frame_parms->N_RB_DL;rb++) {
......
......@@ -33,17 +33,7 @@
#include "SCHED/defs.h"
#include "SCHED/extern.h"
#include "log.h"
#include "emmintrin.h"
#ifdef __SSE3__
#include "pmmintrin.h"
#include "tmmintrin.h"
#else
__m128i zeroPMI;
#define _mm_abs_epi16(xmmx) _mm_xor_si128((xmmx),_mm_cmpgt_epi16(zeroPMI,(xmmx)))
#define _mm_sign_epi16(xmmx,xmmy) _mm_xor_si128((xmmx),_mm_cmpgt_epi16(zeroPMI,(xmmy)))
#endif
#include "PHY/sse_intrin.h"
//#define k1 1000
#define k1 ((long long int) 1000)
......@@ -489,8 +479,8 @@ void lte_ue_measurements(PHY_VARS_UE *phy_vars_ue,
// pmi
pmi128_re = _mm_xor_si128(pmi128_re,pmi128_re);
pmi128_im = _mm_xor_si128(pmi128_im,pmi128_im);
pmi128_re = _mm_setzero_si128();
pmi128_im = _mm_setzero_si128();
// limit is the number of groups of 4 REs in a subband (12 = 4 RBs, 3 = 1 RB)
// for 5 MHz channelization, there are 7 subbands, 6 of size 4 RBs and 1 of size 1 RB
if ((N_RB_DL==6) || (subband<(nb_subbands-1)))
......@@ -507,8 +497,8 @@ void lte_ue_measurements(PHY_VARS_UE *phy_vars_ue,
//print_shorts("ch1",dl_ch1_128);
// }
// if(i==0){
mmtmpPMI0 = _mm_xor_si128(mmtmpPMI0,mmtmpPMI0);
mmtmpPMI1 = _mm_xor_si128(mmtmpPMI1,mmtmpPMI1);
mmtmpPMI0 = _mm_setzero_si128();
mmtmpPMI1 = _mm_setzero_si128();
// }
// if(eNB_id==0)
// print_ints("Pre_re",&mmtmpPMI0);
......
......@@ -28,16 +28,9 @@
*******************************************************************************/
#include "PHY/defs.h"
#include "PHY/extern.h"
#include <emmintrin.h>
#include <xmmintrin.h>
#include "PHY/sse_intrin.h"
//#define DEBUG_CH
#ifndef __SSE3__
__m128i zeroE;//,tmp_over_sqrt_10,tmp_sum_4_over_sqrt_10,tmp_sign,tmp_sign_3_over_sqrt_10;
//#define _mm_abs_epi16(xmmx) _mm_xor_si128((xmmx),_mm_cmpgt_epi16(zero,(xmmx)))
#define _mm_abs_epi16(xmmx) _mm_add_epi16(_mm_xor_si128((xmmx),_mm_cmpgt_epi16(zeroE,(xmmx))),_mm_srli_epi16(_mm_cmpgt_epi16(zeroE,(xmmx)),15))
#define _mm_sign_epi16(xmmx,xmmy) _mm_xor_si128((xmmx),_mm_cmpgt_epi16(zeroE,(xmmy)))
#endif
// For Channel Estimation in Distributed Alamouti Scheme
//static int16_t temp_out_ifft[2048*4] __attribute__((aligned(16)));
......@@ -696,8 +689,8 @@ int16_t lte_ul_freq_offset_estimation(LTE_DL_FRAME_PARMS *frame_parms,
int16_t alpha[128] = {201, 402, 603, 804, 1006, 1207, 1408, 1610, 1811, 2013, 2215, 2417, 2619, 2822, 3024, 3227, 3431, 3634, 3838, 4042, 4246, 4450, 4655, 4861, 5066, 5272, 5479, 5686, 5893, 6101, 6309, 6518, 6727, 6937, 7147, 7358, 7570, 7782, 7995, 8208, 8422, 8637, 8852, 9068, 9285, 9503, 9721, 9940, 10160, 10381, 10603, 10825, 11049, 11273, 11498, 11725, 11952, 12180, 12410, 12640, 12872, 13104, 13338, 13573, 13809, 14046, 14285, 14525, 14766, 15009, 15253, 15498, 15745, 15993, 16243, 16494, 16747, 17001, 17257, 17515, 17774, 18035, 18298, 18563, 18829, 19098, 19368, 19640, 19915, 20191, 20470, 20750, 21033, 21318, 21605, 21895, 22187, 22481, 22778, 23078, 23380, 23685, 23992, 24302, 24615, 24931, 25250, 25572, 25897, 26226, 26557, 26892, 27230, 27572, 27917, 28266, 28618, 28975, 29335, 29699, 30067, 30440, 30817, 31198, 31583, 31973, 32368, 32767};
// compute log2_maxh (output_shift)
avg128U1 = _mm_xor_si128(avg128U1,avg128U1);
avg128U2 = _mm_xor_si128(avg128U2,avg128U2);
avg128U1 = _mm_setzero_si128();
avg128U2 = _mm_setzero_si128();
for (rb=0;rb<nb_rb;rb++) {
avg128U1 = _mm_add_epi32(avg128U1,_mm_madd_epi16(ul_ch1[0],ul_ch1[0]));
......
......@@ -46,12 +46,7 @@
#include "PHY/extern.h"
#include "SCHED/defs.h"
#include "SIMULATION/TOOLS/defs.h" // for taus
#include <emmintrin.h>
#include <xmmintrin.h>
#ifdef __SSE3__
#include <pmmintrin.h>
#include <tmmintrin.h>
#endif
#include "PHY/sse_intrin.h"
#include "assertions.h"
......@@ -61,12 +56,6 @@
//#undef ALL_AGGREGATION
#ifndef __SSE3__
__m128i zero2;
#define _mm_abs_epi16(xmmx) _mm_xor_si128((xmmx),_mm_cmpgt_epi16(zero2,(xmmx)))
#define _mm_sign_epi16(xmmx,xmmy) _mm_xor_si128((xmmx),_mm_cmpgt_epi16(zero2,(xmmy)))
#endif
//extern uint16_t phich_reg[MAX_NUM_PHICH_GROUPS][3];
//extern uint16_t pcfich_reg[4];
......@@ -636,7 +625,7 @@ void pdcch_channel_level(int32_t **dl_ch_estimates_ext,
for (aatx=0;aatx<frame_parms->nb_antennas_tx_eNB;aatx++)
for (aarx=0;aarx<frame_parms->nb_antennas_rx;aarx++) {
//clear average level
avg128P = _mm_xor_si128(avg128P,avg128P);
avg128P = _mm_setzero_si128();
dl_ch128=(__m128i *)&dl_ch_estimates_ext[(aatx<<1)+aarx][frame_parms->N_RB_DL*12];
for (rb=0;rb<nb_rb;rb++) {
......@@ -1272,9 +1261,6 @@ void pdcch_channel_compensation(int32_t **rxdataF_ext,
#ifndef __SSE3__
zero2 = _mm_xor_si128(zero2,zero2);
#endif
#ifdef DEBUG_DCI_DECODING
msg("[PHY] PDCCH comp: symbol %d\n",symbol);
......
......@@ -38,26 +38,13 @@
* \warning
*/
#ifdef __SSE2__
#include <emmintrin.h>
#include <xmmintrin.h>
#endif
#ifdef __SSE3__
#include <pmmintrin.h>
#include <tmmintrin.h>
#endif
#include "PHY/defs.h"
#include "PHY/extern.h"
#include "defs.h"
#include "extern.h"
#include "PHY/sse_intrin.h"
#ifndef __SSE3__
__m128i zero;//,tmp_over_sqrt_10,tmp_sum_4_over_sqrt_10,tmp_sign,tmp_sign_3_over_sqrt_10;
//#define _mm_abs_epi16(xmmx) _mm_xor_si128((xmmx),_mm_cmpgt_epi16(zero,(xmmx)))
#define _mm_abs_epi16(xmmx) _mm_add_epi16(_mm_xor_si128((xmmx),_mm_cmpgt_epi16(zero,(xmmx))),_mm_srli_epi16(_mm_cmpgt_epi16(zero,(xmmx)),15))
#define _mm_sign_epi16(xmmx,xmmy) _mm_xor_si128((xmmx),_mm_cmpgt_epi16(zero,(xmmy)))
#endif
#ifndef USER_MODE
#define NOCYGWIN_STATIC static
......@@ -110,7 +97,7 @@ int rx_pdsch(PHY_VARS_UE *phy_vars_ue,
unsigned char aatx,aarx;
unsigned short nb_rb;
int avgs, rb;
LTE_DL_UE_HARQ_t *dlsch0_harq,*dlsch1_harq;
LTE_DL_UE_HARQ_t *dlsch0_harq,*dlsch1_harq = 0;
switch (type) {
case SI_PDSCH:
......@@ -800,10 +787,6 @@ void dlsch_channel_compensation(int **rxdataF_ext,
symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
#ifndef __SSE3__
zero = _mm_xor_si128(zero,zero);
#endif
if ((symbol_mod == 0) || (symbol_mod == (4-frame_parms->Ncp))) {
if (frame_parms->mode1_flag==1) // 10 out of 12 so don't reduce size
......@@ -815,7 +798,7 @@ void dlsch_channel_compensation(int **rxdataF_ext,
for (aatx=0;aatx<frame_parms->nb_antennas_tx_eNB;aatx++) {
if (mod_order == 4) {
QAM_amp128 = _mm_set1_epi16(QAM16_n1); // 2/sqrt(10)
QAM_amp128b = _mm_xor_si128(QAM_amp128b,QAM_amp128b);
QAM_amp128b = _mm_setzero_si128();
}
else if (mod_order == 6) {
QAM_amp128 = _mm_set1_epi16(QAM64_n1); //
......@@ -1180,15 +1163,11 @@ void dlsch_channel_compensation_TM56(int **rxdataF_ext,
rx_power_correction = 1;
#ifndef __SSE3__
zero = _mm_xor_si128(zero,zero);
#endif
//printf("comp prec: symbol %d, pilots %d\n",symbol, pilots);
if (mod_order == 4) {
QAM_amp128 = _mm_set1_epi16(QAM16_n1);
QAM_amp128b = _mm_xor_si128(QAM_amp128b,QAM_amp128b);
QAM_amp128b = _mm_setzero_si128();
}
else if (mod_order == 6) {
QAM_amp128 = _mm_set1_epi16(QAM64_n1);
......@@ -1396,15 +1375,11 @@ void dlsch_channel_compensation_TM3(LTE_DL_FRAME_PARMS *frame_parms,
rx_power_correction = 1;
#ifndef __SSE3__
zero = _mm_xor_si128(zero,zero);
#endif
//printf("comp prec: symbol %d, pilots %d\n",symbol, pilots);
if (mod_order0 == 4) {
QAM_amp0_128 = _mm_set1_epi16(QAM16_n1);
QAM_amp0_128b = _mm_xor_si128(QAM_amp0_128b,QAM_amp0_128b);
QAM_amp0_128b = _mm_setzero_si128();
}
else if (mod_order0 == 6) {
QAM_amp0_128 = _mm_set1_epi16(QAM64_n1);
......@@ -1412,7 +1387,7 @@ void dlsch_channel_compensation_TM3(LTE_DL_FRAME_PARMS *frame_parms,
}
if (mod_order1 == 4) {
QAM_amp1_128 = _mm_set1_epi16(QAM16_n1);
QAM_amp1_128b = _mm_xor_si128(QAM_amp1_128b,QAM_amp1_128b);
QAM_amp1_128b = _mm_setzero_si128();
}
else if (mod_order1 == 6) {
QAM_amp1_128 = _mm_set1_epi16(QAM64_n1);
......@@ -1949,7 +1924,7 @@ void dlsch_channel_level(int **dl_ch_estimates_ext,
for (aatx=0;aatx<frame_parms->nb_antennas_tx_eNB;aatx++)
for (aarx=0;aarx<frame_parms->nb_antennas_rx;aarx++) {
//clear average level
avg128D = _mm_xor_si128(avg128D,avg128D);
avg128D = _mm_setzero_si128();
// 5 is always a symbol with no pilots for both normal and extended prefix
dl_ch128=(__m128i *)&dl_ch_estimates_ext[(aatx<<1)+aarx][symbol*frame_parms->N_RB_DL*12];
......@@ -2008,7 +1983,7 @@ void dlsch_channel_level_TM3(int **dl_ch_estimates_ext,
symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
//clear average level
avg128D = _mm_xor_si128(avg128D,avg128D);
avg128D = _mm_setzero_si128();
avg[0] = 0;
avg[1] = 0;
// 5 is always a symbol with no pilots for both normal and extended prefix
......@@ -2083,7 +2058,7 @@ void dlsch_channel_level_TM56(int **dl_ch_estimates_ext,
symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
//clear average level
avg128D = _mm_xor_si128(avg128D,avg128D);
avg128D = _mm_setzero_si128();
avg[0] = 0;
avg[1] = 0;
// 5 is always a symbol with no pilots for both normal and extended prefix
......
......@@ -38,27 +38,11 @@
* \warning
*/
#ifdef __SSE2__
#include <emmintrin.h>
#include <xmmintrin.h>
#endif
#ifdef __SSE3__
#include <pmmintrin.h>
#include <tmmintrin.h>
#endif
#ifdef __SSE4_1__
#include <smmintrin.h>
#endif
#include "PHY/defs.h"
#include "PHY/extern.h"
#include "defs.h"
#include "extern.h"
#ifndef __SSE3__
//#define _mm_abs_epi16(xmmx) _mm_xor_si128((xmmx),_mm_cmpgt_epi16(zero,(xmmx)))
//abs_#define _mm_abs_epi16(xmmx) _mm_add_epi16(_mm_xor_si128((xmmx),_mm_cmpgt_epi16((*(__m128i*)&zero[0]),(xmmx))),_mm_srli_epi16(_mm_cmpgt_epi16((*(__m128i*)&zero[0]),(xmmx)),15))
//#define _mm_sign_epi16(xmmx,xmmy) _mm_xor_si128((xmmx),_mm_cmpgt_epi16(zero,(xmmy)))
#endif
#include "PHY/sse_intrin.h"
#ifndef USER_MODE
#define NOCYGWIN_STATIC static
......@@ -1061,7 +1045,7 @@ length = number of resource elements
// Compute the terms for the LLR of first bit
xmm0 = _mm_xor_si128(xmm0,xmm0); // ZERO
xmm0 = _mm_setzero_si128(); // ZERO
// 1 term for numerator of LLR
xmm3 = _mm_subs_epi16(y1r_over2,rho_rpi);
......@@ -1305,7 +1289,7 @@ length = number of resource elements
y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)]
y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)]
xmm0 = _mm_xor_si128(xmm0,xmm0); // ZERO
xmm0 = _mm_setzero_si128(); // ZERO
// compute psi
xmm3 = _mm_subs_epi16(y1r,rho_rpi);
......@@ -1569,7 +1553,7 @@ length = number of resource elements
y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)]
y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)]
xmm0 = _mm_xor_si128(xmm0,xmm0); // ZERO
xmm0 = _mm_setzero_si128(); // ZERO
// compute psi
xmm3 = _mm_subs_epi16(y1r,rho_rpi);
......@@ -1780,7 +1764,7 @@ void qam16_qpsk(short *stream0_in,
y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)]
y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)]
xmm0 = _mm_xor_si128(xmm0,xmm0); // ZERO
xmm0 = _mm_setzero_si128(); // ZERO
xmm2 = _mm_subs_epi16(rho_rpi_1_1,y1r); // = [Re(rho)+ Im(rho)]/sqrt(10) - y1r
psi_r_p1_p1 = _mm_abs_epi16(xmm2); // = |[Re(rho)+ Im(rho)]/sqrt(10) - y1r|
......@@ -2237,7 +2221,7 @@ void qam16_qam16(short *stream0_in,
y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)]
y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)]
xmm0 = _mm_xor_si128(xmm0,xmm0); // ZERO
xmm0 = _mm_setzero_si128(); // ZERO
xmm2 = _mm_subs_epi16(rho_rpi_1_1,y1r); // = [Re(rho)+ Im(rho)]/sqrt(10) - y1r
psi_r_p1_p1 = _mm_abs_epi16(xmm2); // = |[Re(rho)+ Im(rho)]/sqrt(10) - y1r|
......@@ -2748,7 +2732,7 @@ void qam16_qam64(short *stream0_in,
y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)]
y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)]
xmm0 = _mm_xor_si128(xmm0,xmm0); // ZERO
xmm0 = _mm_setzero_si128(); // ZERO
xmm2 = _mm_subs_epi16(rho_rpi_1_1,y1r); // = [Re(rho)+ Im(rho)]/sqrt(10) - y1r
psi_r_p1_p1 = _mm_abs_epi16(xmm2); // = |[Re(rho)+ Im(rho)]/sqrt(10) - y1r|
......@@ -3357,7 +3341,7 @@ void qam64_qpsk(short *stream0_in,
y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)]
// Psi_r calculation from rho_rpi or rho_rmi
xmm0 = _mm_xor_si128(xmm0, xmm0); // ZERO for abs_pi16
xmm0 = _mm_setzero_si128(); // ZERO for abs_pi16
xmm2 = _mm_subs_epi16(rho_rpi_7_7, y1r); psi_r_p7_p7 = _mm_abs_epi16(xmm2);
xmm2 = _mm_subs_epi16(rho_rpi_7_5, y1r); psi_r_p7_p5 = _mm_abs_epi16(xmm2);
xmm2 = _mm_subs_epi16(rho_rpi_7_3, y1r); psi_r_p7_p3 = _mm_abs_epi16(xmm2);
......@@ -4669,7 +4653,7 @@ void qam64_qam16(short *stream0_in,
y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)]
// Psi_r calculation from rho_rpi or rho_rmi
xmm0 = _mm_xor_si128(xmm0, xmm0); // ZERO for abs_pi16
xmm0 = _mm_setzero_si128(); // ZERO for abs_pi16
xmm2 = _mm_subs_epi16(rho_rpi_7_7, y1r); psi_r_p7_p7 = _mm_abs_epi16(xmm2);
xmm2 = _mm_subs_epi16(rho_rpi_7_5, y1r); psi_r_p7_p5 = _mm_abs_epi16(xmm2);
xmm2 = _mm_subs_epi16(rho_rpi_7_3, y1r); psi_r_p7_p3 = _mm_abs_epi16(xmm2);
......@@ -5924,7 +5908,7 @@ void qam64_qam64(short *stream0_in,
y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)]
// Psi_r calculation from rho_rpi or rho_rmi
xmm0 = _mm_xor_si128(xmm0, xmm0); // ZERO for abs_pi16
xmm0 = _mm_setzero_si128(); // ZERO for abs_pi16
xmm2 = _mm_subs_epi16(rho_rpi_7_7, y1r); psi_r_p7_p7 = _mm_abs_epi16(xmm2);
xmm2 = _mm_subs_epi16(rho_rpi_7_5, y1r); psi_r_p7_p5 = _mm_abs_epi16(xmm2);
xmm2 = _mm_subs_epi16(rho_rpi_7_3, y1r); psi_r_p7_p3 = _mm_abs_epi16(xmm2);
......
......@@ -1016,7 +1016,7 @@ int dlsch_modulation(mod_sym_t **txdataF,
uint8_t pilots=0;
uint8_t skip_dc,skip_half;
uint8_t mod_order0 = get_Qm(dlsch0_harq->mcs);
uint8_t mod_order1;
uint8_t mod_order1 = 0;
int16_t amp_rho_a, amp_rho_b;
int16_t qam16_table_a0[4],qam64_table_a0[8],qam16_table_b0[4],qam64_table_b0[8];
int16_t qam16_table_a1[4],qam64_table_a1[8],qam16_table_b1[4],qam64_table_b1[8];
......
......@@ -39,8 +39,7 @@
*/
#include "PHY/defs.h"
#include "PHY/extern.h"
#include <emmintrin.h>
#include <xmmintrin.h>
#include "PHY/sse_intrin.h"
//#define DEBUG_DRS
int generate_drs_pusch(PHY_VARS_UE *phy_vars_ue,
......
......@@ -37,28 +37,18 @@
* \note
* \warning
*/
#include <emmintrin.h>
#include <xmmintrin.h>
#ifdef __SSE3__
#include <pmmintrin.h>
#include <tmmintrin.h>
#endif
#include "PHY/defs.h"
#include "PHY/CODING/extern.h"
#include "PHY/CODING/lte_interleaver_inline.h"
#include "defs.h"
#include "extern.h"
#include "PHY/extern.h"
#include "PHY/sse_intrin.h"
#ifdef PHY_ABSTRACTION
#include "SIMULATION/TOOLS/defs.h"
#endif
#ifndef __SSE3__
extern __m128i zero;
#define _mm_abs_epi16(xmmx) _mm_xor_si128((xmmx),_mm_cmpgt_epi16(zero,(xmmx)))
#define _mm_sign_epi16(xmmx,xmmy) _mm_xor_si128((xmmx),_mm_cmpgt_epi16(zero,(xmmy)))
#endif
//#define DEBUG_PBCH 1
//#define DEBUG_PBCH_ENCODING
......@@ -524,7 +514,7 @@ int pbch_channel_level(int **dl_ch_estimates_ext,
for (aatx=0;aatx<4;aatx++) //frame_parms->nb_antennas_tx_eNB;aatx++)
for (aarx=0;aarx<frame_parms->nb_antennas_rx;aarx++) {
//clear average level
avg128 = _mm_xor_si128(avg128,avg128);
avg128 = _mm_setzero_si128();
dl_ch128=(__m128i *)&dl_ch_estimates_ext[(aatx<<1)+aarx][symbol_mod*6*12];
for (rb=0;rb<nb_rb;rb++) {
......
......@@ -28,6 +28,7 @@
*******************************************************************************/
#include "PHY/defs.h"
#include "PHY/extern.h"
#include "PHY/sse_intrin.h"
// Mask for identifying subframe for MBMS
#define MBSFN_TDD_SF3 0x80// for TDD
......@@ -46,14 +47,6 @@
#define MBSFN_FDD_SF8 0x04
#ifndef __SSE3__
#warning SSE3 instruction set not preset
__m128i zeroM;//,tmp_over_sqrt_10,tmp_sum_4_over_sqrt_10,tmp_sign,tmp_sign_3_over_sqrt_10;
//#define _mm_abs_epi16(xmmx) _mm_xor_si128((xmmx),_mm_cmpgt_epi16(zero,(xmmx)))
#define _mm_abs_epi16(xmmx) _mm_add_epi16(_mm_xor_si128((xmmx),_mm_cmpgt_epi16(zeroM,(xmmx))),_mm_srli_epi16(_mm_cmpgt_epi16(zeroM,(xmmx)),15))
#define _mm_sign_epi16(xmmx,xmmy) _mm_xor_si128((xmmx),_mm_cmpgt_epi16(zeroM,(xmmy)))
#endif
void dump_mch(PHY_VARS_UE *phy_vars_ue,uint8_t eNB_id,uint16_t coded_bits_per_codeword,int subframe) {
......@@ -375,7 +368,7 @@ void mch_channel_level(int **dl_ch_estimates_ext,
for (aarx=0;aarx<frame_parms->nb_antennas_rx;aarx++) {
//clear average level
avg128 = _mm_xor_si128(avg128,avg128);
avg128 = _mm_setzero_si128();
// 5 is always a symbol with no pilots for both normal and extended prefix
dl_ch128=(__m128i *)&dl_ch_estimates_ext[aarx][symbol*frame_parms->N_RB_DL*12];
......
......@@ -37,14 +37,7 @@
* \note
* \warning
*/
#ifdef __SSE2__
#include <emmintrin.h>
#include <xmmintrin.h>
#endif
#ifdef __SSE3__
#include <pmmintrin.h>
#include <tmmintrin.h>
#endif
#include "PHY/sse_intrin.h"
#include "PHY/defs.h"
#include "PHY/extern.h"
//#include "prach.h"
......@@ -960,8 +953,14 @@ void rx_prach(PHY_VARS_eNB *phy_vars_eNB,uint8_t subframe,uint16_t *preamble_ene
(numshift>0) ? (not_found = 0) : (preamble_offset++);
}
}
preamble_shift = -((d_start * (preamble_index0/n_shift_ra)) + ((preamble_index0%n_shift_ra)*NCS)); // minus because the channel is h(t -\tau + Cv)
(preamble_shift < 0) ? (preamble_shift+=N_ZC) : preamble_shift;
if (n_shift_ra>0)
preamble_shift = -((d_start * (preamble_index0/n_shift_ra)) + ((preamble_index0%n_shift_ra)*NCS)); // minus because the channel is h(t -\tau + Cv)
else
preamble_shift = 0;
if (preamble_shift < 0)
preamble_shift+=N_ZC;
preamble_index0++;
if (preamble_index == 0)
first_nonzero_root_idx = preamble_offset;
......
......@@ -354,7 +354,7 @@ int rx_sss(PHY_VARS_UE *phy_vars_ue,int32_t *tot_metric,uint8_t *flip_max,uint8_
sss0 = (int16_t*)&sss0_ext[0][5];
sss5 = (int16_t*)&sss5_ext[0][5];
for (flip=0;flip<2;flip++) { // d0/d5 flip in RX frame
for (phase=0;phase<=7;phase++) { // phase offset between PSS and SSS
for (phase=0;phase<7;phase++) { // phase offset between PSS and SSS
for (Nid1 = 0 ; Nid1 <= 167; Nid1++) { // 168 possible Nid1 values
metric = 0;
if (flip==0) {
......
......@@ -38,15 +38,6 @@
* \warning
*/
#include <emmintrin.h>
#include <xmmintrin.h>
#ifdef __SSE4_1__
#include <smmintrin.h>
#endif
#ifdef __SSE3__
#include <pmmintrin.h>
#include <tmmintrin.h>
#endif
#include "PHY/defs.h"
#include "PHY/extern.h"
#include "MAC_INTERFACE/defs.h"
......@@ -54,16 +45,12 @@
#include "defs.h"
#include "extern.h"
//#define DEBUG_ULSCH
#include "PHY/sse_intrin.h"
//extern char* namepointer_chMag ;
//eren
//extern int **ulchmag_eren;
//eren
#ifndef __SSE3__
__m128i zeroU;
#define _mm_abs_epi16(xmmx) _mm_xor_si128((xmmx),_mm_cmpgt_epi16(zeroU,(xmmx)))
#define _mm_sign_epi16(xmmx,xmmy) _mm_xor_si128((xmmx),_mm_cmpgt_epi16(zeroU,(xmmy)))
#endif
static short jitter[8] __attribute__ ((aligned(16))) = {1,0,0,1,0,1,1,0};
......@@ -502,7 +489,6 @@ void ulsch_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
mmtmpU2 = _mm_abs_epi16(mmtmpU1);
mmtmpU2 = _mm_subs_epi16(ch_magb[i],mmtmpU2);
#ifdef __SSE4_1__
(*llrp32)[0] = _mm_extract_epi32(rxF[i],0);
(*llrp32)[1] = _mm_extract_epi32(mmtmpU1,0);
(*llrp32)[2] = _mm_extract_epi32(mmtmpU2,0);
......@@ -515,7 +501,6 @@ void ulsch_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms,
(*llrp32)[9] = _mm_extract_epi32(rxF[i],3);
(*llrp32)[10] = _mm_extract_epi32(mmtmpU1,3);
(*llrp32)[11] = _mm_extract_epi32(mmtmpU2,3);
#endif
(*llrp32)+=12;
}
......@@ -661,10 +646,6 @@ void ulsch_channel_compensation(int32_t **rxdataF_ext,
#endif
// symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
#ifndef __SSE3__
zeroU = _mm_xor_si128(zeroU,zeroU);
#endif
// printf("comp: symbol %d\n",symbol);
#ifdef ULSCH_OFDMA
......@@ -862,10 +843,6 @@ void ulsch_channel_compensation_alamouti(int32_t **rxdataF_ext,
// symbol_mod = (symbol>=(7-frame_parms->Ncp)) ? symbol-(7-frame_parms->Ncp) : symbol;
#ifndef __SSE3__
zeroU = _mm_xor_si128(zeroU,zeroU);
#endif
// printf("comp: symbol %d\n",symbol);
......@@ -1219,7 +1196,7 @@ void ulsch_channel_level(int32_t **drs_ch_estimates_ext,
for (aarx=0;aarx<frame_parms->nb_antennas_rx;aarx++) {
//clear average level
avg128U = _mm_xor_si128(avg128U,avg128U);