Commit f1e30d57 authored by Florian Kaltenberger's avatar Florian Kaltenberger

Merge branch 'ocp_nrsimulator' into 'develop-nr'

Critical bugs from compilation, Ocp nrsimulator

See merge request !512
parents f7583ef4 165c39da
Pipeline #11688 failed with stage
in 0 seconds
This diff is collapsed.
......@@ -43,10 +43,10 @@ conf_nvram_path=$OPENAIR_DIR/openair3/NAS/TOOLS/ue_eurecom_test_sfr.conf
MSC_GEN=0
XFORMS="True"
UE_EXPANSION="False"
UESIM_EXPANSION="False"
PRINT_STATS="False"
VCD_TIMING="False"
DEADLINE_SCHEDULER_FLAG_USER="False"
FORCE_DEADLINE_SCHEDULER_FLAG_USER=""
DEADLINE_SCHEDULER_FLAG_USER=""
CPU_AFFINITY_FLAG_USER="False" #Only valid when lowlatecy flag is set to False
REL="Rel15"
NR_REL="NR_Rel15"
......@@ -78,7 +78,7 @@ trap handle_ctrl_c INT
function print_help() {
echo_info "
This program installs OpenAirInterface Software
You should have ubuntu 14.xx, updated, and the Linux kernel >= 3.14
You should have ubuntu 16.xx or 18.04 updated
Options
-h
This help
......@@ -116,7 +116,7 @@ Options
Rel8 limits the implementation to 3GPP Release 8 version
Rel10 limits the implementation to 3GPP Release 10 version
-w | --hardware
EXMIMO, USRP, BLADERF, ETHERNET, LMSSDR, ADRV9371_ZC706, None (Default)
EXMIMO, USRP, BLADERF, ETHERNET, LMSSDR, ADRV9371_ZC706, SIMU, None (Default)
Adds this RF board support (in external packages installation and in compilation)
-t | --transport protocol
ETHERNET , None
......@@ -175,6 +175,9 @@ Options
--basic-simulator
Generates a basic [1 UE + 1 eNB + no channel] simulator.
See targets/ARCH/tcp_bridge/README.tcp_bridge_oai for documentation.
--rfsimulator
Generate virtual RF driver
to use it, set the environement variable RFSIMULATOR to \"enb\" in the eNB and to the eNB IP address in the UEs
Usage (first build):
NI/ETTUS B201 + COTS UE : ./build_oai -I --eNB -x --install-system-files -w USRP
Usage (Regular):
......@@ -250,7 +253,7 @@ function main() {
-w | --hardware)
HW="$2" #"${i#*=}"
# Use OAI_USRP as the key word USRP is used inside UHD driver
if [ "$HW" != "BLADERF" -a "$HW" != "USRP" -a "$HW" != "LMSSDR" -a "$HW" != "None" -a "$HW" != "EXMIMO" -a "$HW" != "ADRV9371_ZC706" ] ; then
if [ "$HW" != "BLADERF" -a "$HW" != "USRP" -a "$HW" != "LMSSDR" -a "$HW" != "None" -a "$HW" != "EXMIMO" -a "$HW" != "ADRV9371_ZC706" -a "$HW" != "SIMU" ] ; then
echo_fatal "Unknown HW type $HW will exit..."
else
if [ "$HW" == "USRP" ] ; then
......@@ -265,6 +268,9 @@ function main() {
if [ "$HW" == "LMSSDR" ] ; then
HW="OAI_LMSSDR"
fi
if [ "$HW" == "SIMU" ] ; then
HW="OAI_SIMU"
fi
echo_info "Setting hardware to: $HW"
fi
shift 2;;
......@@ -331,13 +337,13 @@ function main() {
BUILD_DOXYGEN=1
echo_info "Will build doxygen support"
shift;;
--disable-deadline)
FORCE_DEADLINE_SCHEDULER_FLAG_USER="False"
echo_info "Disabling the usage of deadline scheduler"
shift 1;;
--enable-deadline)
FORCE_DEADLINE_SCHEDULER_FLAG_USER="True"
echo_info "Enabling the usage of deadline scheduler"
--disable-deadline)
DEADLINE_SCHEDULER_FLAG_USER="False"
echo_info "Disabling the usage of deadline scheduler"
shift 1;;
--enable-deadline)
DEADLINE_SCHEDULER_FLAG_USER="True"
echo_info "Enabling the usage of deadline scheduler"
shift 1;;
--enable-cpu-affinity)
CPU_AFFINITY_FLAG_USER="True"
......@@ -400,7 +406,11 @@ function main() {
BASIC_SIMULATOR=1
echo_info "Compiling the basic simulator"
shift 1;;
-h | --help)
--rfsimulator)
RFSIMULATOR=true
echo_info "Compiling the RF simulator"
shift 1;;
-h | --help)
print_help
exit 1;;
*)
......@@ -441,33 +451,18 @@ function main() {
fi
fi
echo_info "RF HW set to $HW"
#Now we set flags to enable deadline scheduler settings
#By default: USRP: disable,
#By default: BLADERF: enable,
#By default: EXMIMO: enable
if [ "$FORCE_DEADLINE_SCHEDULER_FLAG_USER" = "" ]; then
if [ "$HW" = "EXMIMO" ] ; then
DEADLINE_SCHEDULER_FLAG_USER="True"
elif [ "$HW" = "ETHERNET" ] ; then
DEADLINE_SCHEDULER_FLAG_USER="False"
elif [ "$HW" = "OAI_USRP" ] ; then
DEADLINE_SCHEDULER_FLAG_USER="False"
elif [ "$HW" = "OAI_ADRV9371_ZC706" ] ; then
DEADLINE_SCHEDULER_FLAG_USER="False"
elif [ "$HW" = "OAI_BLADERF" ] ; then
DEADLINE_SCHEDULER_FLAG_USER="False"
elif [ "$HW" = "OAI_LMSSDR" ] ; then
DEADLINE_SCHEDULER_FLAG_USER="False"
elif [ "$HW" = "None" ] ; then
DEADLINE_SCHEDULER_FLAG_USER="False"
else
echo_error "Unknown HW type $HW. Exiting now..."
exit
fi
else
DEADLINE_SCHEDULER_FLAG_USER=$FORCE_DEADLINE_SCHEDULER_FLAG_USER
fi
echo_info "RF HW set to $HW"
# If the user doesn't specify the Linux scheduler to use, we set a value
if [ "$DEADLINE_SCHEDULER_FLAG_USER" = "" ]; then
case "$HW" in
"EXMIMO")
DEADLINE_SCHEDULER_FLAG_USER="True"
;;
*)
DEADLINE_SCHEDULER_FLAG_USER="False"
;;
esac
fi
#Disable CPU Affinity for deadline scheduler
if [ "$DEADLINE_SCHEDULER_FLAG_USER" = "True" ] ; then
......
#!/bin/bash
GENERATED_FULL_DIR=$1
shift
ASN1_SOURCE_DIR=$1
shift
export ASN1C_PREFIX=$1
shift
options=$*
done_flag="$GENERATED_FULL_DIR"/done
if [ "$done_flag" -ot $ASN1_SOURCE_DIR ] ; then
rm -f "$GENERATED_FULL_DIR"/${ASN1C_PREFIX}*.c "$GENERATED_FULL_DIR"/${ASN1C_PREFIX}*.h
mkdir -p "$GENERATED_FULL_DIR"
asn1c -pdu=all -fcompound-names -gen-PER -no-gen-OER -no-gen-example $options -D $GENERATED_FULL_DIR $ASN1_SOURCE_DIR |& egrep -v "^Copied|^Compiled" | sort -u
fi
touch $done_flag
......@@ -23,9 +23,16 @@
#ifndef BACKTRACE_H_
#define BACKTRACE_H_
#ifdef __cplusplus
extern "C" {
#endif
void display_backtrace(void);
void backtrace_handle_signal(siginfo_t *info);
#ifdef __cplusplus
}
#endif
#endif /* BACKTRACE_H_ */
......@@ -919,7 +919,7 @@ void init_nr_ue_transport(PHY_VARS_NR_UE *ue,int abstraction_flag) {
for (i=0; i<NUMBER_OF_CONNECTED_eNB_MAX; i++) {
for (j=0; j<2; j++) {
for (k=0; k<2; k++) {
for (k=0; k<RX_NB_TH_MAX; k++) {
AssertFatal((ue->dlsch[k][i][j] = new_nr_ue_dlsch(1,NUMBER_OF_HARQ_PID_MAX,NSOFT,MAX_LDPC_ITERATIONS,ue->frame_parms.N_RB_DL, abstraction_flag))!=NULL,"Can't get ue dlsch structures\n");
LOG_D(PHY,"dlsch[%d][%d][%d] => %p\n",k,i,j,ue->dlsch[i][j]);
......
......@@ -421,6 +421,7 @@ void nr_pdcch_extract_rbs_single(int32_t **rxdataF,
#ifdef NR_PDCCH_DCI_DEBUG
printf("\t\t<-NR_PDCCH_DCI_DEBUG (nr_pdcch_extract_rbs_single)-> c_rb=%d\n",c_rb);
#endif
rxF=NULL;
// first we set initial conditions for pointer to rxdataF depending on the situation of the first RB within the CORESET (c_rb = n_BWP_start)
if ((c_rb < (frame_parms->N_RB_DL >> 1)) && ((frame_parms->N_RB_DL & 1) == 0)) {
//if RB to be treated is lower than middle system bandwidth then rxdataF pointed at (offset + c_br + symbol * ofdm_symbol_size): even case
......
......@@ -516,7 +516,8 @@ uint32_t nr_dlsch_decoding(PHY_VARS_NR_UE *phy_vars_ue,
p_nrLDPC_procBuf[r],
p_procTime);
if (check_crc(llrProcBuf,harq_process->B,harq_process->F,crc_type)) {
// Fixme: correct type is unsigned, but nrLDPC_decoder and all called behind use signed int
if (check_crc((uint8_t*)llrProcBuf,harq_process->B,harq_process->F,crc_type)) {
printf("CRC OK\n");
ret = 2;
}
......
......@@ -1350,7 +1350,7 @@ void nr_dlsch_channel_compensation(int **rxdataF_ext,
unsigned char aatx,aarx,pilots=0;
__m128i *dl_ch128,*dl_ch128_2,*dl_ch_mag128,*dl_ch_mag128b,*rxdataF128,*rxdataF_comp128,*rho128;
__m128i mmtmpD0,mmtmpD1,mmtmpD2,mmtmpD3,QAM_amp128,QAM_amp128b;
QAM_amp128b = _mm_setzero_si128();
if (symbol == 2){
pilots=1;
......
......@@ -266,123 +266,33 @@ void nr_pbch_channel_compensation(int **rxdataF_ext,
int **rxdataF_comp,
NR_DL_FRAME_PARMS *frame_parms,
uint32_t symbol,
uint8_t output_shift)
{
short conjugate[8]__attribute__((aligned(16))) = {-1,1,-1,1,-1,1,-1,1};
//short conjugate2[8]__attribute__((aligned(16))) = {1,-1,1,-1,1,-1,1,-1};
#if defined(__x86_64__) || defined(__i386__)
__m128i mmtmpP0,mmtmpP1,mmtmpP2,mmtmpP3;
#elif defined(__arm__)
int16x8_t mmtmpP0,mmtmpP1,mmtmpP2,mmtmpP3;
#endif
uint16_t nb_re=180;
uint8_t aarx;
#if defined(__x86_64__) || defined(__i386__)
__m128i *dl_ch128,*rxdataF128,*rxdataF_comp128;
#elif defined(__arm__)
#endif
uint8_t output_shift) {
const uint16_t nb_re=symbol == 2 ? 72 : 180;
AssertFatal((symbol > 0 && symbol < 4),
"symbol %d is illegal for PBCH DM-RS\n",
symbol);
if (symbol == 2) nb_re = 72;
// printf("comp: symbol %d : nb_re %d\n",symbol,nb_re);
for (aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
for (int aarx=0; aarx<frame_parms->nb_antennas_rx; aarx++) {
#if defined(__x86_64__) || defined(__i386__)
dl_ch128 = (__m128i *)&dl_ch_estimates_ext[aarx][symbol*20*12];
rxdataF128 = (__m128i *)&rxdataF_ext[aarx][symbol*20*12];
rxdataF_comp128 = (__m128i *)&rxdataF_comp[aarx][symbol*20*12];
vect128 *dl_ch128 = (vect128 *)&dl_ch_estimates_ext[aarx][symbol*20*12];
vect128 *rxdataF128 = (vect128 *)&rxdataF_ext[aarx][symbol*20*12];
vect128 *rxdataF_comp128 = (vect128 *)&rxdataF_comp[aarx][symbol*20*12];
/*
printf("ch compensation dl_ch ext addr %p \n", &dl_ch_estimates_ext[aarx][symbol*20*12]);
printf("rxdataf ext addr %p symbol %d\n", &rxdataF_ext[aarx][symbol*20*12], symbol);
printf("rxdataf_comp addr %p\n",&rxdataF_comp[aarx][symbol*20*12]);
*/
#elif defined(__arm__)
// to be filled in
#endif
for (int re=0; re<nb_re; re+=12) {
// printf("******re %d\n",re);
#if defined(__x86_64__) || defined(__i386__)
// multiply by conjugated channel
mmtmpP0 = _mm_madd_epi16(dl_ch128[0],rxdataF128[0]);
// print_ints("re",&mmtmpP0);
// mmtmpP0 contains real part of 4 consecutive outputs (32-bit)
mmtmpP1 = _mm_shufflelo_epi16(dl_ch128[0],_MM_SHUFFLE(2,3,0,1));
mmtmpP1 = _mm_shufflehi_epi16(mmtmpP1,_MM_SHUFFLE(2,3,0,1));
mmtmpP1 = _mm_sign_epi16(mmtmpP1,*(__m128i*)&conjugate[0]);
// print_ints("im",&mmtmpP1);
mmtmpP1 = _mm_madd_epi16(mmtmpP1,rxdataF128[0]);
// mmtmpP1 contains imag part of 4 consecutive outputs (32-bit)
mmtmpP0 = _mm_srai_epi32(mmtmpP0,output_shift);
// print_ints("re(shift)",&mmtmpP0);
mmtmpP1 = _mm_srai_epi32(mmtmpP1,output_shift);
// print_ints("im(shift)",&mmtmpP1);
mmtmpP2 = _mm_unpacklo_epi32(mmtmpP0,mmtmpP1);
mmtmpP3 = _mm_unpackhi_epi32(mmtmpP0,mmtmpP1);
// print_ints("c0",&mmtmpP2);
// print_ints("c1",&mmtmpP3);
rxdataF_comp128[0] = _mm_packs_epi32(mmtmpP2,mmtmpP3);
/*
print_shorts("rx:",rxdataF128);
print_shorts("ch:",dl_ch128);
print_shorts("pack:",rxdataF_comp128);
*/
// multiply by conjugated channel
mmtmpP0 = _mm_madd_epi16(dl_ch128[1],rxdataF128[1]);
// mmtmpP0 contains real part of 4 consecutive outputs (32-bit)
mmtmpP1 = _mm_shufflelo_epi16(dl_ch128[1],_MM_SHUFFLE(2,3,0,1));
mmtmpP1 = _mm_shufflehi_epi16(mmtmpP1,_MM_SHUFFLE(2,3,0,1));
mmtmpP1 = _mm_sign_epi16(mmtmpP1,*(__m128i*)&conjugate[0]);
mmtmpP1 = _mm_madd_epi16(mmtmpP1,rxdataF128[1]);
// mmtmpP1 contains imag part of 4 consecutive outputs (32-bit)
mmtmpP0 = _mm_srai_epi32(mmtmpP0,output_shift);
mmtmpP1 = _mm_srai_epi32(mmtmpP1,output_shift);
mmtmpP2 = _mm_unpacklo_epi32(mmtmpP0,mmtmpP1);
mmtmpP3 = _mm_unpackhi_epi32(mmtmpP0,mmtmpP1);
rxdataF_comp128[1] = _mm_packs_epi32(mmtmpP2,mmtmpP3);
// print_shorts("rx:",rxdataF128+1);
// print_shorts("ch:",dl_ch128+1);
// print_shorts("pack:",rxdataF_comp128+1);
// multiply by conjugated channel
mmtmpP0 = _mm_madd_epi16(dl_ch128[2],rxdataF128[2]);
// mmtmpP0 contains real part of 4 consecutive outputs (32-bit)
mmtmpP1 = _mm_shufflelo_epi16(dl_ch128[2],_MM_SHUFFLE(2,3,0,1));
mmtmpP1 = _mm_shufflehi_epi16(mmtmpP1,_MM_SHUFFLE(2,3,0,1));
mmtmpP1 = _mm_sign_epi16(mmtmpP1,*(__m128i*)&conjugate[0]);
mmtmpP1 = _mm_madd_epi16(mmtmpP1,rxdataF128[2]);
// mmtmpP1 contains imag part of 4 consecutive outputs (32-bit)
mmtmpP0 = _mm_srai_epi32(mmtmpP0,output_shift);
mmtmpP1 = _mm_srai_epi32(mmtmpP1,output_shift);
mmtmpP2 = _mm_unpacklo_epi32(mmtmpP0,mmtmpP1);
mmtmpP3 = _mm_unpackhi_epi32(mmtmpP0,mmtmpP1);
rxdataF_comp128[2] = _mm_packs_epi32(mmtmpP2,mmtmpP3);
// print_shorts("rx:",rxdataF128+2);
// print_shorts("ch:",dl_ch128+2);
// print_shorts("pack:",rxdataF_comp128+2);
dl_ch128+=3;
rxdataF128+=3;
rxdataF_comp128+=3;
#elif defined(__arm__)
// to be filled in
#endif
*rxdataF_comp128++ = mulByConjugate128(rxdataF128++, dl_ch128++, output_shift);
*rxdataF_comp128++ = mulByConjugate128(rxdataF128++, dl_ch128++, output_shift);
*rxdataF_comp128++ = mulByConjugate128(rxdataF128++, dl_ch128++, output_shift);
}
}
#if defined(__x86_64__) || defined(__i386__)
_mm_empty();
_m_empty();
#endif
}
void nr_pbch_detection_mrc(NR_DL_FRAME_PARMS *frame_parms,
......
......@@ -577,7 +577,7 @@ int32_t generate_nr_prach( PHY_VARS_NR_UE *ue, uint8_t eNB_id, uint8_t subframe,
uint8_t preamble_index = ue->prach_resources[eNB_id]->ra_PreambleIndex;
//uint8_t tdd_mapindex = ue->prach_resources[eNB_id]->ra_TDD_map_index;
int16_t *prachF = ue->prach_vars[eNB_id]->prachF;
static int16_t prach_tmp[45600*2] __attribute__((aligned(32)));
static int16_t prach_tmp[45600*4] __attribute__((aligned(32)));
int16_t *prach = prach_tmp;
int16_t *prach2;
int16_t amp = ue->prach_vars[eNB_id]->amp;
......
This diff is collapsed.
......@@ -399,7 +399,7 @@ typedef struct RU_t_s{
/// function pointer to release function for radio interface
int (*stop_rf)(struct RU_t_s *ru);
/// function pointer to initialization function for radio interface
int (*start_if)(struct RU_t_s *ru,struct PHY_VARS_eNB_s *eNB);
int (*start_if)(struct RU_t_s *ru,struct PHY_VARS_gNB_s *gNB);
/// function pointer to RX front-end processing routine (DFTs/prefix removal or NULL)
void (*feprx)(struct RU_t_s *ru);
/// function pointer to TX front-end processing routine (IDFTs and prefix removal or NULL)
......
......@@ -1021,11 +1021,13 @@ typedef struct {
NR_UE_COMMON common_vars;
nr_ue_if_module_t *if_inst;
nr_downlink_indication_t dl_indication;
nr_uplink_indication_t ul_indication;
/// UE FAPI DCI request
nr_dcireq_t dcireq;
// CHECK if we need those as they are also included in dl_indictation
/// UE FAPI indication for DLSCH reception
fapi_nr_rx_indication_t rx_ind;
/// UE FAPI indication for DCI reception
......
......@@ -26,11 +26,11 @@
* The host CPU needs to have support for SSE2 at least. SSE3 and SSE4.1 functions are emulated if the CPU lacks support for them.
* This will slow down the softmodem, but may be valuable if only offline signal processing is required.
*
* \author S. Held
* \email sebastian.held@imst.de
* \company IMST GmbH
* \date 2015
* \version 0.1
* \author S. Held, Laurent THOMAS
* \email sebastian.held@imst.de, laurent.thomas@open-cells.com
* \company IMST GmbH, Open Cells Project
* \date 2019
* \version 0.2
*/
#ifndef SSE_INTRIN_H
......@@ -40,23 +40,23 @@
#if defined(__x86_64) || defined(__i386__)
#ifndef __SSE2__
# error SSE2 processor intrinsics disabled
# error SSE2 processor intrinsics disabled
#endif
#include <emmintrin.h>
#include <xmmintrin.h>
#ifdef __SSE3__
# include <pmmintrin.h>
# include <tmmintrin.h>
#include <pmmintrin.h>
#include <tmmintrin.h>
#endif
#ifdef __SSE4_1__
# include <smmintrin.h>
#include <smmintrin.h>
#endif
#ifdef __AVX2__
# include <immintrin.h>
#include <immintrin.h>
#endif
// ------------------------------------------------
......@@ -108,8 +108,7 @@ typedef union {
* \date 2006-2008
* \copyright Apache License 2.0
*/
static inline __m128i ssp_comge_epi8_SSE2(__m128i a, __m128i b)
{
static inline __m128i ssp_comge_epi8_SSE2(__m128i a, __m128i b) {
__m128i c;
c = _mm_cmpgt_epi8( a, b );
a = _mm_cmpeq_epi8( a, b );
......@@ -126,13 +125,11 @@ static inline __m128i ssp_comge_epi8_SSE2(__m128i a, __m128i b)
* \date 2006-2008
* \copyright Apache License 2.0
*/
static inline __m128i ssp_shuffle_epi8_SSE2 (__m128i a, __m128i mask)
{
static inline __m128i ssp_shuffle_epi8_SSE2 (__m128i a, __m128i mask) {
ssp_m128 A,B, MASK, maskZero;
A.i = a;
maskZero.i = ssp_comge_epi8_SSE2( mask, _mm_setzero_si128() );
MASK.i = _mm_and_si128 ( mask, _mm_set1_epi8( (char)0x0F) );
B.s8[ 0] = A.s8[ (MASK.s8[ 0]) ];
B.s8[ 1] = A.s8[ (MASK.s8[ 1]) ];
B.s8[ 2] = A.s8[ (MASK.s8[ 2]) ];
......@@ -149,7 +146,6 @@ static inline __m128i ssp_shuffle_epi8_SSE2 (__m128i a, __m128i mask)
B.s8[13] = A.s8[ (MASK.s8[13]) ];
B.s8[14] = A.s8[ (MASK.s8[14]) ];
B.s8[15] = A.s8[ (MASK.s8[15]) ];
B.i = _mm_and_si128( B.i, maskZero.i );
return B.i;
}
......@@ -182,8 +178,7 @@ static inline __m128i ssp_shuffle_epi8_SSE2 (__m128i a, __m128i mask)
* \date 2006-2008
* \copyright Apache License 2.0
*/
static inline __m128i ssp_insert_epi8_SSE2( __m128i a, int b, const int ndx )
{
static inline __m128i ssp_insert_epi8_SSE2( __m128i a, int b, const int ndx ) {
ssp_m128 Ahi, Alo;
b = b & 0xFF; /* Convert to 8-bit integer */
Ahi.i = _mm_unpackhi_epi8( a, _mm_setzero_si128() ); /* Ahi = a_8[8:15] Simulate 8bit integers as 16-bit integers */
......@@ -191,72 +186,71 @@ static inline __m128i ssp_insert_epi8_SSE2( __m128i a, int b, const int ndx )
/* Insert b as a 16-bit integer to upper or lower half of a */
switch( ndx & 0xF ) {
case 0:
Alo.i = _mm_insert_epi16( Alo.i, b, 0 );
break;
case 0:
Alo.i = _mm_insert_epi16( Alo.i, b, 0 );
break;
case 1:
Alo.i = _mm_insert_epi16( Alo.i, b, 1 );
break;
case 1:
Alo.i = _mm_insert_epi16( Alo.i, b, 1 );
break;
case 2:
Alo.i = _mm_insert_epi16( Alo.i, b, 2 );
break;
case 2:
Alo.i = _mm_insert_epi16( Alo.i, b, 2 );
break;
case 3:
Alo.i = _mm_insert_epi16( Alo.i, b, 3 );
break;
case 3:
Alo.i = _mm_insert_epi16( Alo.i, b, 3 );
break;
case 4:
Alo.i = _mm_insert_epi16( Alo.i, b, 4 );
break;
case 4:
Alo.i = _mm_insert_epi16( Alo.i, b, 4 );
break;
case 5:
Alo.i = _mm_insert_epi16( Alo.i, b, 5 );
break;
case 5:
Alo.i = _mm_insert_epi16( Alo.i, b, 5 );
break;
case 6:
Alo.i = _mm_insert_epi16( Alo.i, b, 6 );
break;
case 6:
Alo.i = _mm_insert_epi16( Alo.i, b, 6 );
break;
case 7:
Alo.i = _mm_insert_epi16( Alo.i, b, 7 );
break;
case 7:
Alo.i = _mm_insert_epi16( Alo.i, b, 7 );
break;
case 8:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 0 );
break;
case 8:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 0 );
break;
case 9:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 1 );
break;
case 9:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 1 );
break;
case 10:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 2 );
break;
case 10:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 2 );
break;
case 11:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 3 );
break;
case 11:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 3 );
break;
case 12:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 4 );
break;
case 12:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 4 );
break;
case 13:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 5 );
break;
case 13:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 5 );
break;
case 14:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 6 );
break;
case 14:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 6 );
break;
default:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 7 );
default:
Ahi.i = _mm_insert_epi16( Ahi.i, b, 7 );
}
return _mm_packus_epi16( Alo.i, Ahi.i ); // Pack the 16-bit integers to 8bit again.
///* Another implementation, but slower: */
//ssp_m128 A, B, mask;
//mask.i = _mm_setzero_si128();
......@@ -277,16 +271,13 @@ static inline __m128i ssp_insert_epi8_SSE2( __m128i a, int b, const int ndx )
* \date 2006-2008
* \copyright Apache License 2.0
*/
static inline __m128i ssp_cvtepi8_epi16_SSE2 ( __m128i a)
{
static inline __m128i ssp_cvtepi8_epi16_SSE2 ( __m128i a) {
__m128i b = _mm_setzero_si128 ();
__m128i c = _mm_unpacklo_epi8(a, b);
__m128i d = _mm_set1_epi16 (128);
b = _mm_and_si128(d, c);
d = _mm_set1_epi16(0x1FE);
b = _mm_mullo_epi16(b, d);
return _mm_add_epi16(c, b);
}
......@@ -299,8 +290,7 @@ static inline __m128i ssp_cvtepi8_epi16_SSE2 ( __m128i a)
* \date 2006-2008
* \copyright Apache License 2.0
*/
static inline __m128i ssp_logical_bitwise_select_SSE2( __m128i a, __m128i b, __m128i mask ) // Bitwise (mask ? a : b)
{
static inline __m128i ssp_logical_bitwise_select_SSE2( __m128i a, __m128i b, __m128i mask ) { // Bitwise (mask ? a : b)
a = _mm_and_si128 ( a, mask ); // clear a where mask = 0
b = _mm_andnot_si128( mask, b ); // clear b where mask = 1
a = _mm_or_si128 ( a, b ); // a = a OR b
......@@ -316,8 +306,7 @@ static inline __m128i ssp_logical_bitwise_select_SSE2( __m128i a, __m128i b, __m
* \date 2006-2008
* \copyright Apache License 2.0
*/
static inline __m128i ssp_max_epi8_SSE2( __m128i a, __m128i b )
{
static inline __m128i ssp_max_epi8_SSE2( __m128i a, __m128i b ) {
__m128i mask = _mm_cmpgt_epi8( a, b ); // FFFFFFFF where a > b
a = ssp_logical_bitwise_select_SSE2( a, b, mask );
return a;
......@@ -332,15 +321,12 @@ static inline __m128i ssp_max_epi8_SSE2( __m128i a, __m128i b )
* \date 2006-2008
* \copyright Apache License 2.0
*/
static inline __m128i ssp_cvtepi16_epi32_SSE2 ( __m128i a)
{
static inline __m128i ssp_cvtepi16_epi32_SSE2 ( __m128i a) {
__m128i b = _mm_set1_epi32 (-1); //0xFFFFFFFF
__m128i c = _mm_unpacklo_epi16(a, b); //FFFFa0**FFFFa1**....
__m128i d = _mm_set1_epi32 (0x8000); //0x8000
b = _mm_andnot_si128(c, d); // 0x80 for positive, 0x00 for negative
d = _mm_slli_epi32(b, 1); // 0x100 for positive, 0x000 for negative
return _mm_add_epi32(c, d);
}
#endif // __SSE4_1__
......@@ -350,5 +336,46 @@ static inline __m128i ssp_cvtepi16_epi32_SSE2 ( __m128i a)
#endif // x86_64 || i386
#endif // SSE_INTRIN_H
#if defined(__x86_64__) || defined(__i386__)
#define vect128 __m128i
#elif defined(__arm__)
#define vect128 int16x8_t
#endif
static const short minusConjug128[8]__attribute__((aligned(16))) = {-1,1,-1,1,-1,1,-1,1};
static inline vect128 mulByConjugate128(vect128 *a, vect128 *b, int8_t output_shift) {
#if defined(__x86_64__) || defined(__i386__)
vect128 realPart = _mm_madd_epi16(*a,*b);
realPart = _mm_srai_epi32(realPart,output_shift);
vect128 imagPart = _mm_shufflelo_epi16(*b,_MM_SHUFFLE(2,3,0,1));
imagPart = _mm_shufflehi_epi16(imagPart,_MM_SHUFFLE(2,3,0,1));
imagPart = _mm_sign_epi16(imagPart,*(vect128 *)minusConjug128);
imagPart = _mm_madd_epi16(imagPart,*a);
imagPart = _mm_srai_epi32(imagPart,output_shift);
vect128 lowPart = _mm_unpacklo_epi32(realPart,imagPart);
vect128 highPart = _mm_unpackhi_epi32(realPart,imagPart);
return ( _mm_packs_epi32(lowPart,highPart));
#elif defined(__arm__)
AssertFatal(false, "not developped\n");
#endif
}
#if defined(__x86_64__) || defined(__i386__)
#define displaySamples128(vect) {\
__m128i x=vect; \
printf("vector: %s = (%hd,%hd) (%hd,%hd) (%hd,%hd) (%hd,%hd)\n", #vect, \
_mm_extract_epi16(x,0), \
_mm_extract_epi16(x,1),\
_mm_extract_epi16(x,2),\
_mm_extract_epi16(x,3),\
_mm_extract_epi16(x,4),\
_mm_extract_epi16(x,5),\
_mm_extract_epi16(x,6),\
_mm_extract_epi16(x,7));\
}
#elif defined(__arm__)
displaySamples128(vect) {}
//TBD
#endif
#endif // SSE_INTRIN_H
......@@ -36,4 +36,6 @@ nr_subframe_t nr_slot_select(nfapi_nr_config_request_t *cfg,unsigned char slot)
{
if (cfg->subframe_config.duplex_mode.value == FDD)
return(SF_DL);
LOG_E(PHY,"Not developped TDD mode\n");
return -1;
}
......@@ -1595,22 +1595,10 @@ void ue_ulsch_uespec_procedures(PHY_VARS_NR_UE *ue,UE_nr_rxtx_proc_t *proc,uint8
int harq_pid;
int frame_tx=proc->frame_tx;
int nr_tti_tx=proc->nr_tti_tx;
int Mod_id = ue->Mod_id;
int CC_id = ue->CC_id;
uint8_t Msg3_flag=0;
uint16_t first_rb, nb_rb;
unsigned int input_buffer_length;
int i;
int aa;
int tx_amp;
uint8_t ulsch_input_buffer[5477] __attribute__ ((aligned(32)));
uint8_t access_mode;
uint8_t Nbundled=0;
uint8_t NbundledCw1=0;