From 5b13d71e24aa19274366ee36f4221b97e576ebde Mon Sep 17 00:00:00 2001 From: laurent <laurent Thomas> Date: Tue, 14 Feb 2023 15:44:58 +0100 Subject: [PATCH] fix bugs introduced by previous commit --- .../nr_dl_channel_estimation.c | 7 +- openair1/PHY/NR_UE_TRANSPORT/csi_rx.c | 5 +- .../NR_UE_TRANSPORT/nr_dlsch_demodulation.c | 8 +- .../nr_dlsch_llr_computation.c | 7575 +---------------- .../NR_UE_TRANSPORT/nr_transport_proto_ue.h | 211 +- 5 files changed, 36 insertions(+), 7770 deletions(-) diff --git a/openair1/PHY/NR_UE_ESTIMATION/nr_dl_channel_estimation.c b/openair1/PHY/NR_UE_ESTIMATION/nr_dl_channel_estimation.c index 41cba313e76..814de26b886 100644 --- a/openair1/PHY/NR_UE_ESTIMATION/nr_dl_channel_estimation.c +++ b/openair1/PHY/NR_UE_ESTIMATION/nr_dl_channel_estimation.c @@ -1862,10 +1862,11 @@ void nr_pdsch_ptrs_processing(PHY_VARS_NR_UE *ue, #ifdef DEBUG_DL_PTRS printf("[PHY][DL][PTRS]: Rotate Symbol %2d with %d + j* %d\n", i, phase_per_symbol[i].r,phase_per_symbol[i].i); #endif - rotate_cpx_vector((c16_t*)&rxdataF_comp[aarx][(i * (*nb_rb) * NR_NB_SC_PER_RB)], + rotate_cpx_vector((c16_t *)&rxdataF_comp[0][aarx][(i * (*nb_rb) * NR_NB_SC_PER_RB)], &phase_per_symbol[i], - (c16_t*)&rxdataF_comp[aarx][(i * (*nb_rb) * NR_NB_SC_PER_RB)], - ((*nb_rb) * NR_NB_SC_PER_RB), 15); + (c16_t *)&rxdataF_comp[0][aarx][(i * (*nb_rb) * NR_NB_SC_PER_RB)], + ((*nb_rb) * NR_NB_SC_PER_RB), + 15); }// if not DMRS Symbol }// symbol loop }// last symbol check diff --git a/openair1/PHY/NR_UE_TRANSPORT/csi_rx.c b/openair1/PHY/NR_UE_TRANSPORT/csi_rx.c index 08aa58d5f98..4eaa36f1bc4 100644 --- a/openair1/PHY/NR_UE_TRANSPORT/csi_rx.c +++ b/openair1/PHY/NR_UE_TRANSPORT/csi_rx.c @@ -518,9 +518,8 @@ int nr_csi_rs_ri_estimation(const PHY_VARS_NR_UE *ue, // construct Hh x H elements if(ant_rx_conjch == ant_rx_ch) { - nr_a_sum_b((__m128i *)&csi_rs_estimated_A_MF[port_tx_conjch][port_tx_ch][k_offset], - (__m128i *)&csi_rs_estimated_conjch_ch[ant_rx_conjch][port_tx_conjch][ant_rx_ch][port_tx_ch][k_offset], - 1); + nr_a_sum_b( + (c16_t *)&csi_rs_estimated_A_MF[port_tx_conjch][port_tx_ch][k_offset], (c16_t *)&csi_rs_estimated_conjch_ch[ant_rx_conjch][port_tx_conjch][ant_rx_ch][port_tx_ch][k_offset], 1); } } } diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_demodulation.c b/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_demodulation.c index 5a0136e4b6e..d5681721536 100644 --- a/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_demodulation.c +++ b/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_demodulation.c @@ -1876,12 +1876,7 @@ uint8_t nr_matrix_inverse(int32_t size, if(flag) {//fixed point SIMD calc. //Allocate the submatrix elements - c16_t sub_matrix_data[size - 1][size - 1][12 * nb_rb]; - memset(sub_matrix_data, 0, sizeof(sub_matrix_data)); c16_t *sub_matrix[size - 1][size - 1]; - for (int rtx = 0; rtx < (size - 1); rtx++) - for (int ctx = 0; ctx < (size - 1); ctx++) - sub_matrix[ctx][rtx] = sub_matrix_data[ctx][rtx]; //Compute Matrix determinant nr_determin(size, @@ -1913,7 +1908,8 @@ uint8_t nr_matrix_inverse(int32_t size, //fill out the sub matrix corresponds to this element for (int ridx=0;ridx<(size-1);ridx++) for (int cidx=0;cidx<(size-1);cidx++) - memcpy(sub_matrix[cidx][ridx], a44[cc[cidx]][rr[ridx]], sizeof(sub_matrix_data[cidx][ridx])); + // To verify + sub_matrix[cidx][ridx]=a44[cc[cidx]][rr[ridx]]; nr_determin(size - 1, // size sub_matrix, diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_llr_computation.c b/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_llr_computation.c index 17dd046d322..1058e23a6aa 100644 --- a/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_llr_computation.c +++ b/openair1/PHY/NR_UE_TRANSPORT/nr_dlsch_llr_computation.c @@ -43,581 +43,7 @@ int16_t nr_zeros[8] __attribute__ ((aligned(16))) = {0,0,0,0,0,0,0,0}; int16_t nr_ones[8] __attribute__ ((aligned(16))) = {0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff}; #if defined(__x86_64__) || defined(__i386__) __m128i rho_rpi __attribute__ ((aligned(16))); -__m128i rho_rmi __attribute__ ((aligned(16))); -__m128i rho_rpi_1_1 __attribute__ ((aligned(16))); -__m128i rho_rpi_1_3 __attribute__ ((aligned(16))); -__m128i rho_rpi_1_5 __attribute__ ((aligned(16))); -__m128i rho_rpi_1_7 __attribute__ ((aligned(16))); -__m128i rho_rpi_3_1 __attribute__ ((aligned(16))); -__m128i rho_rpi_3_3 __attribute__ ((aligned(16))); -__m128i rho_rpi_3_5 __attribute__ ((aligned(16))); -__m128i rho_rpi_3_7 __attribute__ ((aligned(16))); -__m128i rho_rpi_5_1 __attribute__ ((aligned(16))); -__m128i rho_rpi_5_3 __attribute__ ((aligned(16))); -__m128i rho_rpi_5_5 __attribute__ ((aligned(16))); -__m128i rho_rpi_5_7 __attribute__ ((aligned(16))); -__m128i rho_rpi_7_1 __attribute__ ((aligned(16))); -__m128i rho_rpi_7_3 __attribute__ ((aligned(16))); -__m128i rho_rpi_7_5 __attribute__ ((aligned(16))); -__m128i rho_rpi_7_7 __attribute__ ((aligned(16))); -__m128i rho_rmi_1_1 __attribute__ ((aligned(16))); -__m128i rho_rmi_1_3 __attribute__ ((aligned(16))); -__m128i rho_rmi_1_5 __attribute__ ((aligned(16))); -__m128i rho_rmi_1_7 __attribute__ ((aligned(16))); -__m128i rho_rmi_3_1 __attribute__ ((aligned(16))); -__m128i rho_rmi_3_3 __attribute__ ((aligned(16))); -__m128i rho_rmi_3_5 __attribute__ ((aligned(16))); -__m128i rho_rmi_3_7 __attribute__ ((aligned(16))); -__m128i rho_rmi_5_1 __attribute__ ((aligned(16))); -__m128i rho_rmi_5_3 __attribute__ ((aligned(16))); -__m128i rho_rmi_5_5 __attribute__ ((aligned(16))); -__m128i rho_rmi_5_7 __attribute__ ((aligned(16))); -__m128i rho_rmi_7_1 __attribute__ ((aligned(16))); -__m128i rho_rmi_7_3 __attribute__ ((aligned(16))); -__m128i rho_rmi_7_5 __attribute__ ((aligned(16))); -__m128i rho_rmi_7_7 __attribute__ ((aligned(16))); - -__m128i psi_r_m7_m7 __attribute__ ((aligned(16))); -__m128i psi_r_m7_m5 __attribute__ ((aligned(16))); -__m128i psi_r_m7_m3 __attribute__ ((aligned(16))); -__m128i psi_r_m7_m1 __attribute__ ((aligned(16))); -__m128i psi_r_m7_p1 __attribute__ ((aligned(16))); -__m128i psi_r_m7_p3 __attribute__ ((aligned(16))); -__m128i psi_r_m7_p5 __attribute__ ((aligned(16))); -__m128i psi_r_m7_p7 __attribute__ ((aligned(16))); -__m128i psi_r_m5_m7 __attribute__ ((aligned(16))); -__m128i psi_r_m5_m5 __attribute__ ((aligned(16))); -__m128i psi_r_m5_m3 __attribute__ ((aligned(16))); -__m128i psi_r_m5_m1 __attribute__ ((aligned(16))); -__m128i psi_r_m5_p1 __attribute__ ((aligned(16))); -__m128i psi_r_m5_p3 __attribute__ ((aligned(16))); -__m128i psi_r_m5_p5 __attribute__ ((aligned(16))); -__m128i psi_r_m5_p7 __attribute__ ((aligned(16))); -__m128i psi_r_m3_m7 __attribute__ ((aligned(16))); -__m128i psi_r_m3_m5 __attribute__ ((aligned(16))); -__m128i psi_r_m3_m3 __attribute__ ((aligned(16))); -__m128i psi_r_m3_m1 __attribute__ ((aligned(16))); -__m128i psi_r_m3_p1 __attribute__ ((aligned(16))); -__m128i psi_r_m3_p3 __attribute__ ((aligned(16))); -__m128i psi_r_m3_p5 __attribute__ ((aligned(16))); -__m128i psi_r_m3_p7 __attribute__ ((aligned(16))); -__m128i psi_r_m1_m7 __attribute__ ((aligned(16))); -__m128i psi_r_m1_m5 __attribute__ ((aligned(16))); -__m128i psi_r_m1_m3 __attribute__ ((aligned(16))); -__m128i psi_r_m1_m1 __attribute__ ((aligned(16))); -__m128i psi_r_m1_p1 __attribute__ ((aligned(16))); -__m128i psi_r_m1_p3 __attribute__ ((aligned(16))); -__m128i psi_r_m1_p5 __attribute__ ((aligned(16))); -__m128i psi_r_m1_p7 __attribute__ ((aligned(16))); -__m128i psi_r_p1_m7 __attribute__ ((aligned(16))); -__m128i psi_r_p1_m5 __attribute__ ((aligned(16))); -__m128i psi_r_p1_m3 __attribute__ ((aligned(16))); -__m128i psi_r_p1_m1 __attribute__ ((aligned(16))); -__m128i psi_r_p1_p1 __attribute__ ((aligned(16))); -__m128i psi_r_p1_p3 __attribute__ ((aligned(16))); -__m128i psi_r_p1_p5 __attribute__ ((aligned(16))); -__m128i psi_r_p1_p7 __attribute__ ((aligned(16))); -__m128i psi_r_p3_m7 __attribute__ ((aligned(16))); -__m128i psi_r_p3_m5 __attribute__ ((aligned(16))); -__m128i psi_r_p3_m3 __attribute__ ((aligned(16))); -__m128i psi_r_p3_m1 __attribute__ ((aligned(16))); -__m128i psi_r_p3_p1 __attribute__ ((aligned(16))); -__m128i psi_r_p3_p3 __attribute__ ((aligned(16))); -__m128i psi_r_p3_p5 __attribute__ ((aligned(16))); -__m128i psi_r_p3_p7 __attribute__ ((aligned(16))); -__m128i psi_r_p5_m7 __attribute__ ((aligned(16))); -__m128i psi_r_p5_m5 __attribute__ ((aligned(16))); -__m128i psi_r_p5_m3 __attribute__ ((aligned(16))); -__m128i psi_r_p5_m1 __attribute__ ((aligned(16))); -__m128i psi_r_p5_p1 __attribute__ ((aligned(16))); -__m128i psi_r_p5_p3 __attribute__ ((aligned(16))); -__m128i psi_r_p5_p5 __attribute__ ((aligned(16))); -__m128i psi_r_p5_p7 __attribute__ ((aligned(16))); -__m128i psi_r_p7_m7 __attribute__ ((aligned(16))); -__m128i psi_r_p7_m5 __attribute__ ((aligned(16))); -__m128i psi_r_p7_m3 __attribute__ ((aligned(16))); -__m128i psi_r_p7_m1 __attribute__ ((aligned(16))); -__m128i psi_r_p7_p1 __attribute__ ((aligned(16))); -__m128i psi_r_p7_p3 __attribute__ ((aligned(16))); -__m128i psi_r_p7_p5 __attribute__ ((aligned(16))); -__m128i psi_r_p7_p7 __attribute__ ((aligned(16))); - -__m128i psi_i_m7_m7 __attribute__ ((aligned(16))); -__m128i psi_i_m7_m5 __attribute__ ((aligned(16))); -__m128i psi_i_m7_m3 __attribute__ ((aligned(16))); -__m128i psi_i_m7_m1 __attribute__ ((aligned(16))); -__m128i psi_i_m7_p1 __attribute__ ((aligned(16))); -__m128i psi_i_m7_p3 __attribute__ ((aligned(16))); -__m128i psi_i_m7_p5 __attribute__ ((aligned(16))); -__m128i psi_i_m7_p7 __attribute__ ((aligned(16))); -__m128i psi_i_m5_m7 __attribute__ ((aligned(16))); -__m128i psi_i_m5_m5 __attribute__ ((aligned(16))); -__m128i psi_i_m5_m3 __attribute__ ((aligned(16))); -__m128i psi_i_m5_m1 __attribute__ ((aligned(16))); -__m128i psi_i_m5_p1 __attribute__ ((aligned(16))); -__m128i psi_i_m5_p3 __attribute__ ((aligned(16))); -__m128i psi_i_m5_p5 __attribute__ ((aligned(16))); -__m128i psi_i_m5_p7 __attribute__ ((aligned(16))); -__m128i psi_i_m3_m7 __attribute__ ((aligned(16))); -__m128i psi_i_m3_m5 __attribute__ ((aligned(16))); -__m128i psi_i_m3_m3 __attribute__ ((aligned(16))); -__m128i psi_i_m3_m1 __attribute__ ((aligned(16))); -__m128i psi_i_m3_p1 __attribute__ ((aligned(16))); -__m128i psi_i_m3_p3 __attribute__ ((aligned(16))); -__m128i psi_i_m3_p5 __attribute__ ((aligned(16))); -__m128i psi_i_m3_p7 __attribute__ ((aligned(16))); -__m128i psi_i_m1_m7 __attribute__ ((aligned(16))); -__m128i psi_i_m1_m5 __attribute__ ((aligned(16))); -__m128i psi_i_m1_m3 __attribute__ ((aligned(16))); -__m128i psi_i_m1_m1 __attribute__ ((aligned(16))); -__m128i psi_i_m1_p1 __attribute__ ((aligned(16))); -__m128i psi_i_m1_p3 __attribute__ ((aligned(16))); -__m128i psi_i_m1_p5 __attribute__ ((aligned(16))); -__m128i psi_i_m1_p7 __attribute__ ((aligned(16))); -__m128i psi_i_p1_m7 __attribute__ ((aligned(16))); -__m128i psi_i_p1_m5 __attribute__ ((aligned(16))); -__m128i psi_i_p1_m3 __attribute__ ((aligned(16))); -__m128i psi_i_p1_m1 __attribute__ ((aligned(16))); -__m128i psi_i_p1_p1 __attribute__ ((aligned(16))); -__m128i psi_i_p1_p3 __attribute__ ((aligned(16))); -__m128i psi_i_p1_p5 __attribute__ ((aligned(16))); -__m128i psi_i_p1_p7 __attribute__ ((aligned(16))); -__m128i psi_i_p3_m7 __attribute__ ((aligned(16))); -__m128i psi_i_p3_m5 __attribute__ ((aligned(16))); -__m128i psi_i_p3_m3 __attribute__ ((aligned(16))); -__m128i psi_i_p3_m1 __attribute__ ((aligned(16))); -__m128i psi_i_p3_p1 __attribute__ ((aligned(16))); -__m128i psi_i_p3_p3 __attribute__ ((aligned(16))); -__m128i psi_i_p3_p5 __attribute__ ((aligned(16))); -__m128i psi_i_p3_p7 __attribute__ ((aligned(16))); -__m128i psi_i_p5_m7 __attribute__ ((aligned(16))); -__m128i psi_i_p5_m5 __attribute__ ((aligned(16))); -__m128i psi_i_p5_m3 __attribute__ ((aligned(16))); -__m128i psi_i_p5_m1 __attribute__ ((aligned(16))); -__m128i psi_i_p5_p1 __attribute__ ((aligned(16))); -__m128i psi_i_p5_p3 __attribute__ ((aligned(16))); -__m128i psi_i_p5_p5 __attribute__ ((aligned(16))); -__m128i psi_i_p5_p7 __attribute__ ((aligned(16))); -__m128i psi_i_p7_m7 __attribute__ ((aligned(16))); -__m128i psi_i_p7_m5 __attribute__ ((aligned(16))); -__m128i psi_i_p7_m3 __attribute__ ((aligned(16))); -__m128i psi_i_p7_m1 __attribute__ ((aligned(16))); -__m128i psi_i_p7_p1 __attribute__ ((aligned(16))); -__m128i psi_i_p7_p3 __attribute__ ((aligned(16))); -__m128i psi_i_p7_p5 __attribute__ ((aligned(16))); -__m128i psi_i_p7_p7 __attribute__ ((aligned(16))); - -__m128i a_r_m7_m7 __attribute__ ((aligned(16))); -__m128i a_r_m7_m5 __attribute__ ((aligned(16))); -__m128i a_r_m7_m3 __attribute__ ((aligned(16))); -__m128i a_r_m7_m1 __attribute__ ((aligned(16))); -__m128i a_r_m7_p1 __attribute__ ((aligned(16))); -__m128i a_r_m7_p3 __attribute__ ((aligned(16))); -__m128i a_r_m7_p5 __attribute__ ((aligned(16))); -__m128i a_r_m7_p7 __attribute__ ((aligned(16))); -__m128i a_r_m5_m7 __attribute__ ((aligned(16))); -__m128i a_r_m5_m5 __attribute__ ((aligned(16))); -__m128i a_r_m5_m3 __attribute__ ((aligned(16))); -__m128i a_r_m5_m1 __attribute__ ((aligned(16))); -__m128i a_r_m5_p1 __attribute__ ((aligned(16))); -__m128i a_r_m5_p3 __attribute__ ((aligned(16))); -__m128i a_r_m5_p5 __attribute__ ((aligned(16))); -__m128i a_r_m5_p7 __attribute__ ((aligned(16))); -__m128i a_r_m3_m7 __attribute__ ((aligned(16))); -__m128i a_r_m3_m5 __attribute__ ((aligned(16))); -__m128i a_r_m3_m3 __attribute__ ((aligned(16))); -__m128i a_r_m3_m1 __attribute__ ((aligned(16))); -__m128i a_r_m3_p1 __attribute__ ((aligned(16))); -__m128i a_r_m3_p3 __attribute__ ((aligned(16))); -__m128i a_r_m3_p5 __attribute__ ((aligned(16))); -__m128i a_r_m3_p7 __attribute__ ((aligned(16))); -__m128i a_r_m1_m7 __attribute__ ((aligned(16))); -__m128i a_r_m1_m5 __attribute__ ((aligned(16))); -__m128i a_r_m1_m3 __attribute__ ((aligned(16))); -__m128i a_r_m1_m1 __attribute__ ((aligned(16))); -__m128i a_r_m1_p1 __attribute__ ((aligned(16))); -__m128i a_r_m1_p3 __attribute__ ((aligned(16))); -__m128i a_r_m1_p5 __attribute__ ((aligned(16))); -__m128i a_r_m1_p7 __attribute__ ((aligned(16))); -__m128i a_r_p1_m7 __attribute__ ((aligned(16))); -__m128i a_r_p1_m5 __attribute__ ((aligned(16))); -__m128i a_r_p1_m3 __attribute__ ((aligned(16))); -__m128i a_r_p1_m1 __attribute__ ((aligned(16))); -__m128i a_r_p1_p1 __attribute__ ((aligned(16))); -__m128i a_r_p1_p3 __attribute__ ((aligned(16))); -__m128i a_r_p1_p5 __attribute__ ((aligned(16))); -__m128i a_r_p1_p7 __attribute__ ((aligned(16))); -__m128i a_r_p3_m7 __attribute__ ((aligned(16))); -__m128i a_r_p3_m5 __attribute__ ((aligned(16))); -__m128i a_r_p3_m3 __attribute__ ((aligned(16))); -__m128i a_r_p3_m1 __attribute__ ((aligned(16))); -__m128i a_r_p3_p1 __attribute__ ((aligned(16))); -__m128i a_r_p3_p3 __attribute__ ((aligned(16))); -__m128i a_r_p3_p5 __attribute__ ((aligned(16))); -__m128i a_r_p3_p7 __attribute__ ((aligned(16))); -__m128i a_r_p5_m7 __attribute__ ((aligned(16))); -__m128i a_r_p5_m5 __attribute__ ((aligned(16))); -__m128i a_r_p5_m3 __attribute__ ((aligned(16))); -__m128i a_r_p5_m1 __attribute__ ((aligned(16))); -__m128i a_r_p5_p1 __attribute__ ((aligned(16))); -__m128i a_r_p5_p3 __attribute__ ((aligned(16))); -__m128i a_r_p5_p5 __attribute__ ((aligned(16))); -__m128i a_r_p5_p7 __attribute__ ((aligned(16))); -__m128i a_r_p7_m7 __attribute__ ((aligned(16))); -__m128i a_r_p7_m5 __attribute__ ((aligned(16))); -__m128i a_r_p7_m3 __attribute__ ((aligned(16))); -__m128i a_r_p7_m1 __attribute__ ((aligned(16))); -__m128i a_r_p7_p1 __attribute__ ((aligned(16))); -__m128i a_r_p7_p3 __attribute__ ((aligned(16))); -__m128i a_r_p7_p5 __attribute__ ((aligned(16))); -__m128i a_r_p7_p7 __attribute__ ((aligned(16))); - -__m128i a_i_m7_m7 __attribute__ ((aligned(16))); -__m128i a_i_m7_m5 __attribute__ ((aligned(16))); -__m128i a_i_m7_m3 __attribute__ ((aligned(16))); -__m128i a_i_m7_m1 __attribute__ ((aligned(16))); -__m128i a_i_m7_p1 __attribute__ ((aligned(16))); -__m128i a_i_m7_p3 __attribute__ ((aligned(16))); -__m128i a_i_m7_p5 __attribute__ ((aligned(16))); -__m128i a_i_m7_p7 __attribute__ ((aligned(16))); -__m128i a_i_m5_m7 __attribute__ ((aligned(16))); -__m128i a_i_m5_m5 __attribute__ ((aligned(16))); -__m128i a_i_m5_m3 __attribute__ ((aligned(16))); -__m128i a_i_m5_m1 __attribute__ ((aligned(16))); -__m128i a_i_m5_p1 __attribute__ ((aligned(16))); -__m128i a_i_m5_p3 __attribute__ ((aligned(16))); -__m128i a_i_m5_p5 __attribute__ ((aligned(16))); -__m128i a_i_m5_p7 __attribute__ ((aligned(16))); -__m128i a_i_m3_m7 __attribute__ ((aligned(16))); -__m128i a_i_m3_m5 __attribute__ ((aligned(16))); -__m128i a_i_m3_m3 __attribute__ ((aligned(16))); -__m128i a_i_m3_m1 __attribute__ ((aligned(16))); -__m128i a_i_m3_p1 __attribute__ ((aligned(16))); -__m128i a_i_m3_p3 __attribute__ ((aligned(16))); -__m128i a_i_m3_p5 __attribute__ ((aligned(16))); -__m128i a_i_m3_p7 __attribute__ ((aligned(16))); -__m128i a_i_m1_m7 __attribute__ ((aligned(16))); -__m128i a_i_m1_m5 __attribute__ ((aligned(16))); -__m128i a_i_m1_m3 __attribute__ ((aligned(16))); -__m128i a_i_m1_m1 __attribute__ ((aligned(16))); -__m128i a_i_m1_p1 __attribute__ ((aligned(16))); -__m128i a_i_m1_p3 __attribute__ ((aligned(16))); -__m128i a_i_m1_p5 __attribute__ ((aligned(16))); -__m128i a_i_m1_p7 __attribute__ ((aligned(16))); -__m128i a_i_p1_m7 __attribute__ ((aligned(16))); -__m128i a_i_p1_m5 __attribute__ ((aligned(16))); -__m128i a_i_p1_m3 __attribute__ ((aligned(16))); -__m128i a_i_p1_m1 __attribute__ ((aligned(16))); -__m128i a_i_p1_p1 __attribute__ ((aligned(16))); -__m128i a_i_p1_p3 __attribute__ ((aligned(16))); -__m128i a_i_p1_p5 __attribute__ ((aligned(16))); -__m128i a_i_p1_p7 __attribute__ ((aligned(16))); -__m128i a_i_p3_m7 __attribute__ ((aligned(16))); -__m128i a_i_p3_m5 __attribute__ ((aligned(16))); -__m128i a_i_p3_m3 __attribute__ ((aligned(16))); -__m128i a_i_p3_m1 __attribute__ ((aligned(16))); -__m128i a_i_p3_p1 __attribute__ ((aligned(16))); -__m128i a_i_p3_p3 __attribute__ ((aligned(16))); -__m128i a_i_p3_p5 __attribute__ ((aligned(16))); -__m128i a_i_p3_p7 __attribute__ ((aligned(16))); -__m128i a_i_p5_m7 __attribute__ ((aligned(16))); -__m128i a_i_p5_m5 __attribute__ ((aligned(16))); -__m128i a_i_p5_m3 __attribute__ ((aligned(16))); -__m128i a_i_p5_m1 __attribute__ ((aligned(16))); -__m128i a_i_p5_p1 __attribute__ ((aligned(16))); -__m128i a_i_p5_p3 __attribute__ ((aligned(16))); -__m128i a_i_p5_p5 __attribute__ ((aligned(16))); -__m128i a_i_p5_p7 __attribute__ ((aligned(16))); -__m128i a_i_p7_m7 __attribute__ ((aligned(16))); -__m128i a_i_p7_m5 __attribute__ ((aligned(16))); -__m128i a_i_p7_m3 __attribute__ ((aligned(16))); -__m128i a_i_p7_m1 __attribute__ ((aligned(16))); -__m128i a_i_p7_p1 __attribute__ ((aligned(16))); -__m128i a_i_p7_p3 __attribute__ ((aligned(16))); -__m128i a_i_p7_p5 __attribute__ ((aligned(16))); -__m128i a_i_p7_p7 __attribute__ ((aligned(16))); - -__m128i psi_a_m7_m7 __attribute__ ((aligned(16))); -__m128i psi_a_m7_m5 __attribute__ ((aligned(16))); -__m128i psi_a_m7_m3 __attribute__ ((aligned(16))); -__m128i psi_a_m7_m1 __attribute__ ((aligned(16))); -__m128i psi_a_m7_p1 __attribute__ ((aligned(16))); -__m128i psi_a_m7_p3 __attribute__ ((aligned(16))); -__m128i psi_a_m7_p5 __attribute__ ((aligned(16))); -__m128i psi_a_m7_p7 __attribute__ ((aligned(16))); -__m128i psi_a_m5_m7 __attribute__ ((aligned(16))); -__m128i psi_a_m5_m5 __attribute__ ((aligned(16))); -__m128i psi_a_m5_m3 __attribute__ ((aligned(16))); -__m128i psi_a_m5_m1 __attribute__ ((aligned(16))); -__m128i psi_a_m5_p1 __attribute__ ((aligned(16))); -__m128i psi_a_m5_p3 __attribute__ ((aligned(16))); -__m128i psi_a_m5_p5 __attribute__ ((aligned(16))); -__m128i psi_a_m5_p7 __attribute__ ((aligned(16))); -__m128i psi_a_m3_m7 __attribute__ ((aligned(16))); -__m128i psi_a_m3_m5 __attribute__ ((aligned(16))); -__m128i psi_a_m3_m3 __attribute__ ((aligned(16))); -__m128i psi_a_m3_m1 __attribute__ ((aligned(16))); -__m128i psi_a_m3_p1 __attribute__ ((aligned(16))); -__m128i psi_a_m3_p3 __attribute__ ((aligned(16))); -__m128i psi_a_m3_p5 __attribute__ ((aligned(16))); -__m128i psi_a_m3_p7 __attribute__ ((aligned(16))); -__m128i psi_a_m1_m7 __attribute__ ((aligned(16))); -__m128i psi_a_m1_m5 __attribute__ ((aligned(16))); -__m128i psi_a_m1_m3 __attribute__ ((aligned(16))); -__m128i psi_a_m1_m1 __attribute__ ((aligned(16))); -__m128i psi_a_m1_p1 __attribute__ ((aligned(16))); -__m128i psi_a_m1_p3 __attribute__ ((aligned(16))); -__m128i psi_a_m1_p5 __attribute__ ((aligned(16))); -__m128i psi_a_m1_p7 __attribute__ ((aligned(16))); -__m128i psi_a_p1_m7 __attribute__ ((aligned(16))); -__m128i psi_a_p1_m5 __attribute__ ((aligned(16))); -__m128i psi_a_p1_m3 __attribute__ ((aligned(16))); -__m128i psi_a_p1_m1 __attribute__ ((aligned(16))); -__m128i psi_a_p1_p1 __attribute__ ((aligned(16))); -__m128i psi_a_p1_p3 __attribute__ ((aligned(16))); -__m128i psi_a_p1_p5 __attribute__ ((aligned(16))); -__m128i psi_a_p1_p7 __attribute__ ((aligned(16))); -__m128i psi_a_p3_m7 __attribute__ ((aligned(16))); -__m128i psi_a_p3_m5 __attribute__ ((aligned(16))); -__m128i psi_a_p3_m3 __attribute__ ((aligned(16))); -__m128i psi_a_p3_m1 __attribute__ ((aligned(16))); -__m128i psi_a_p3_p1 __attribute__ ((aligned(16))); -__m128i psi_a_p3_p3 __attribute__ ((aligned(16))); -__m128i psi_a_p3_p5 __attribute__ ((aligned(16))); -__m128i psi_a_p3_p7 __attribute__ ((aligned(16))); -__m128i psi_a_p5_m7 __attribute__ ((aligned(16))); -__m128i psi_a_p5_m5 __attribute__ ((aligned(16))); -__m128i psi_a_p5_m3 __attribute__ ((aligned(16))); -__m128i psi_a_p5_m1 __attribute__ ((aligned(16))); -__m128i psi_a_p5_p1 __attribute__ ((aligned(16))); -__m128i psi_a_p5_p3 __attribute__ ((aligned(16))); -__m128i psi_a_p5_p5 __attribute__ ((aligned(16))); -__m128i psi_a_p5_p7 __attribute__ ((aligned(16))); -__m128i psi_a_p7_m7 __attribute__ ((aligned(16))); -__m128i psi_a_p7_m5 __attribute__ ((aligned(16))); -__m128i psi_a_p7_m3 __attribute__ ((aligned(16))); -__m128i psi_a_p7_m1 __attribute__ ((aligned(16))); -__m128i psi_a_p7_p1 __attribute__ ((aligned(16))); -__m128i psi_a_p7_p3 __attribute__ ((aligned(16))); -__m128i psi_a_p7_p5 __attribute__ ((aligned(16))); -__m128i psi_a_p7_p7 __attribute__ ((aligned(16))); - -__m128i a_sq_m7_m7 __attribute__ ((aligned(16))); -__m128i a_sq_m7_m5 __attribute__ ((aligned(16))); -__m128i a_sq_m7_m3 __attribute__ ((aligned(16))); -__m128i a_sq_m7_m1 __attribute__ ((aligned(16))); -__m128i a_sq_m7_p1 __attribute__ ((aligned(16))); -__m128i a_sq_m7_p3 __attribute__ ((aligned(16))); -__m128i a_sq_m7_p5 __attribute__ ((aligned(16))); -__m128i a_sq_m7_p7 __attribute__ ((aligned(16))); -__m128i a_sq_m5_m7 __attribute__ ((aligned(16))); -__m128i a_sq_m5_m5 __attribute__ ((aligned(16))); -__m128i a_sq_m5_m3 __attribute__ ((aligned(16))); -__m128i a_sq_m5_m1 __attribute__ ((aligned(16))); -__m128i a_sq_m5_p1 __attribute__ ((aligned(16))); -__m128i a_sq_m5_p3 __attribute__ ((aligned(16))); -__m128i a_sq_m5_p5 __attribute__ ((aligned(16))); -__m128i a_sq_m5_p7 __attribute__ ((aligned(16))); -__m128i a_sq_m3_m7 __attribute__ ((aligned(16))); -__m128i a_sq_m3_m5 __attribute__ ((aligned(16))); -__m128i a_sq_m3_m3 __attribute__ ((aligned(16))); -__m128i a_sq_m3_m1 __attribute__ ((aligned(16))); -__m128i a_sq_m3_p1 __attribute__ ((aligned(16))); -__m128i a_sq_m3_p3 __attribute__ ((aligned(16))); -__m128i a_sq_m3_p5 __attribute__ ((aligned(16))); -__m128i a_sq_m3_p7 __attribute__ ((aligned(16))); -__m128i a_sq_m1_m7 __attribute__ ((aligned(16))); -__m128i a_sq_m1_m5 __attribute__ ((aligned(16))); -__m128i a_sq_m1_m3 __attribute__ ((aligned(16))); -__m128i a_sq_m1_m1 __attribute__ ((aligned(16))); -__m128i a_sq_m1_p1 __attribute__ ((aligned(16))); -__m128i a_sq_m1_p3 __attribute__ ((aligned(16))); -__m128i a_sq_m1_p5 __attribute__ ((aligned(16))); -__m128i a_sq_m1_p7 __attribute__ ((aligned(16))); -__m128i a_sq_p1_m7 __attribute__ ((aligned(16))); -__m128i a_sq_p1_m5 __attribute__ ((aligned(16))); -__m128i a_sq_p1_m3 __attribute__ ((aligned(16))); -__m128i a_sq_p1_m1 __attribute__ ((aligned(16))); -__m128i a_sq_p1_p1 __attribute__ ((aligned(16))); -__m128i a_sq_p1_p3 __attribute__ ((aligned(16))); -__m128i a_sq_p1_p5 __attribute__ ((aligned(16))); -__m128i a_sq_p1_p7 __attribute__ ((aligned(16))); -__m128i a_sq_p3_m7 __attribute__ ((aligned(16))); -__m128i a_sq_p3_m5 __attribute__ ((aligned(16))); -__m128i a_sq_p3_m3 __attribute__ ((aligned(16))); -__m128i a_sq_p3_m1 __attribute__ ((aligned(16))); -__m128i a_sq_p3_p1 __attribute__ ((aligned(16))); -__m128i a_sq_p3_p3 __attribute__ ((aligned(16))); -__m128i a_sq_p3_p5 __attribute__ ((aligned(16))); -__m128i a_sq_p3_p7 __attribute__ ((aligned(16))); -__m128i a_sq_p5_m7 __attribute__ ((aligned(16))); -__m128i a_sq_p5_m5 __attribute__ ((aligned(16))); -__m128i a_sq_p5_m3 __attribute__ ((aligned(16))); -__m128i a_sq_p5_m1 __attribute__ ((aligned(16))); -__m128i a_sq_p5_p1 __attribute__ ((aligned(16))); -__m128i a_sq_p5_p3 __attribute__ ((aligned(16))); -__m128i a_sq_p5_p5 __attribute__ ((aligned(16))); -__m128i a_sq_p5_p7 __attribute__ ((aligned(16))); -__m128i a_sq_p7_m7 __attribute__ ((aligned(16))); -__m128i a_sq_p7_m5 __attribute__ ((aligned(16))); -__m128i a_sq_p7_m3 __attribute__ ((aligned(16))); -__m128i a_sq_p7_m1 __attribute__ ((aligned(16))); -__m128i a_sq_p7_p1 __attribute__ ((aligned(16))); -__m128i a_sq_p7_p3 __attribute__ ((aligned(16))); -__m128i a_sq_p7_p5 __attribute__ ((aligned(16))); -__m128i a_sq_p7_p7 __attribute__ ((aligned(16))); - -__m128i bit_met_m7_m7 __attribute__ ((aligned(16))); -__m128i bit_met_m7_m5 __attribute__ ((aligned(16))); -__m128i bit_met_m7_m3 __attribute__ ((aligned(16))); -__m128i bit_met_m7_m1 __attribute__ ((aligned(16))); -__m128i bit_met_m7_p1 __attribute__ ((aligned(16))); -__m128i bit_met_m7_p3 __attribute__ ((aligned(16))); -__m128i bit_met_m7_p5 __attribute__ ((aligned(16))); -__m128i bit_met_m7_p7 __attribute__ ((aligned(16))); -__m128i bit_met_m5_m7 __attribute__ ((aligned(16))); -__m128i bit_met_m5_m5 __attribute__ ((aligned(16))); -__m128i bit_met_m5_m3 __attribute__ ((aligned(16))); -__m128i bit_met_m5_m1 __attribute__ ((aligned(16))); -__m128i bit_met_m5_p1 __attribute__ ((aligned(16))); -__m128i bit_met_m5_p3 __attribute__ ((aligned(16))); -__m128i bit_met_m5_p5 __attribute__ ((aligned(16))); -__m128i bit_met_m5_p7 __attribute__ ((aligned(16))); -__m128i bit_met_m3_m7 __attribute__ ((aligned(16))); -__m128i bit_met_m3_m5 __attribute__ ((aligned(16))); -__m128i bit_met_m3_m3 __attribute__ ((aligned(16))); -__m128i bit_met_m3_m1 __attribute__ ((aligned(16))); -__m128i bit_met_m3_p1 __attribute__ ((aligned(16))); -__m128i bit_met_m3_p3 __attribute__ ((aligned(16))); -__m128i bit_met_m3_p5 __attribute__ ((aligned(16))); -__m128i bit_met_m3_p7 __attribute__ ((aligned(16))); -__m128i bit_met_m1_m7 __attribute__ ((aligned(16))); -__m128i bit_met_m1_m5 __attribute__ ((aligned(16))); -__m128i bit_met_m1_m3 __attribute__ ((aligned(16))); -__m128i bit_met_m1_m1 __attribute__ ((aligned(16))); -__m128i bit_met_m1_p1 __attribute__ ((aligned(16))); -__m128i bit_met_m1_p3 __attribute__ ((aligned(16))); -__m128i bit_met_m1_p5 __attribute__ ((aligned(16))); -__m128i bit_met_m1_p7 __attribute__ ((aligned(16))); -__m128i bit_met_p1_m7 __attribute__ ((aligned(16))); -__m128i bit_met_p1_m5 __attribute__ ((aligned(16))); -__m128i bit_met_p1_m3 __attribute__ ((aligned(16))); -__m128i bit_met_p1_m1 __attribute__ ((aligned(16))); -__m128i bit_met_p1_p1 __attribute__ ((aligned(16))); -__m128i bit_met_p1_p3 __attribute__ ((aligned(16))); -__m128i bit_met_p1_p5 __attribute__ ((aligned(16))); -__m128i bit_met_p1_p7 __attribute__ ((aligned(16))); -__m128i bit_met_p3_m7 __attribute__ ((aligned(16))); -__m128i bit_met_p3_m5 __attribute__ ((aligned(16))); -__m128i bit_met_p3_m3 __attribute__ ((aligned(16))); -__m128i bit_met_p3_m1 __attribute__ ((aligned(16))); -__m128i bit_met_p3_p1 __attribute__ ((aligned(16))); -__m128i bit_met_p3_p3 __attribute__ ((aligned(16))); -__m128i bit_met_p3_p5 __attribute__ ((aligned(16))); -__m128i bit_met_p3_p7 __attribute__ ((aligned(16))); -__m128i bit_met_p5_m7 __attribute__ ((aligned(16))); -__m128i bit_met_p5_m5 __attribute__ ((aligned(16))); -__m128i bit_met_p5_m3 __attribute__ ((aligned(16))); -__m128i bit_met_p5_m1 __attribute__ ((aligned(16))); -__m128i bit_met_p5_p1 __attribute__ ((aligned(16))); -__m128i bit_met_p5_p3 __attribute__ ((aligned(16))); -__m128i bit_met_p5_p5 __attribute__ ((aligned(16))); -__m128i bit_met_p5_p7 __attribute__ ((aligned(16))); -__m128i bit_met_p7_m7 __attribute__ ((aligned(16))); -__m128i bit_met_p7_m5 __attribute__ ((aligned(16))); -__m128i bit_met_p7_m3 __attribute__ ((aligned(16))); -__m128i bit_met_p7_m1 __attribute__ ((aligned(16))); -__m128i bit_met_p7_p1 __attribute__ ((aligned(16))); -__m128i bit_met_p7_p3 __attribute__ ((aligned(16))); -__m128i bit_met_p7_p5 __attribute__ ((aligned(16))); -__m128i bit_met_p7_p7 __attribute__ ((aligned(16))); - -__m128i y0_p_1_1 __attribute__ ((aligned(16))); -__m128i y0_p_1_3 __attribute__ ((aligned(16))); -__m128i y0_p_1_5 __attribute__ ((aligned(16))); -__m128i y0_p_1_7 __attribute__ ((aligned(16))); -__m128i y0_p_3_1 __attribute__ ((aligned(16))); -__m128i y0_p_3_3 __attribute__ ((aligned(16))); -__m128i y0_p_3_5 __attribute__ ((aligned(16))); -__m128i y0_p_3_7 __attribute__ ((aligned(16))); -__m128i y0_p_5_1 __attribute__ ((aligned(16))); -__m128i y0_p_5_3 __attribute__ ((aligned(16))); -__m128i y0_p_5_5 __attribute__ ((aligned(16))); -__m128i y0_p_5_7 __attribute__ ((aligned(16))); -__m128i y0_p_7_1 __attribute__ ((aligned(16))); -__m128i y0_p_7_3 __attribute__ ((aligned(16))); -__m128i y0_p_7_5 __attribute__ ((aligned(16))); -__m128i y0_p_7_7 __attribute__ ((aligned(16))); -__m128i y0_m_1_1 __attribute__ ((aligned(16))); -__m128i y0_m_1_3 __attribute__ ((aligned(16))); -__m128i y0_m_1_5 __attribute__ ((aligned(16))); -__m128i y0_m_1_7 __attribute__ ((aligned(16))); -__m128i y0_m_3_1 __attribute__ ((aligned(16))); -__m128i y0_m_3_3 __attribute__ ((aligned(16))); -__m128i y0_m_3_5 __attribute__ ((aligned(16))); -__m128i y0_m_3_7 __attribute__ ((aligned(16))); -__m128i y0_m_5_1 __attribute__ ((aligned(16))); -__m128i y0_m_5_3 __attribute__ ((aligned(16))); -__m128i y0_m_5_5 __attribute__ ((aligned(16))); -__m128i y0_m_5_7 __attribute__ ((aligned(16))); -__m128i y0_m_7_1 __attribute__ ((aligned(16))); -__m128i y0_m_7_3 __attribute__ ((aligned(16))); -__m128i y0_m_7_5 __attribute__ ((aligned(16))); -__m128i y0_m_7_7 __attribute__ ((aligned(16))); - -__m128i xmm0 __attribute__ ((aligned(16))); -__m128i xmm1 __attribute__ ((aligned(16))); -__m128i xmm2 __attribute__ ((aligned(16))); -__m128i xmm3 __attribute__ ((aligned(16))); -__m128i xmm4 __attribute__ ((aligned(16))); -__m128i xmm5 __attribute__ ((aligned(16))); -__m128i xmm6 __attribute__ ((aligned(16))); -__m128i xmm7 __attribute__ ((aligned(16))); -__m128i xmm8 __attribute__ ((aligned(16))); - -__m128i y0r __attribute__ ((aligned(16))); -__m128i y0i __attribute__ ((aligned(16))); -__m128i y1r __attribute__ ((aligned(16))); -__m128i y1i __attribute__ ((aligned(16))); -__m128i y2r __attribute__ ((aligned(16))); -__m128i y2i __attribute__ ((aligned(16))); - -__m128i logmax_num_re0 __attribute__ ((aligned(16))); -__m128i logmax_num_im0 __attribute__ ((aligned(16))); -__m128i logmax_den_re0 __attribute__ ((aligned(16))); -__m128i logmax_den_im0 __attribute__ ((aligned(16))); -__m128i logmax_num_re1 __attribute__ ((aligned(16))); -__m128i logmax_num_im1 __attribute__ ((aligned(16))); -__m128i logmax_den_re1 __attribute__ ((aligned(16))); -__m128i logmax_den_im1 __attribute__ ((aligned(16))); - -__m128i tmp_result __attribute__ ((aligned(16))); -__m128i tmp_result2 __attribute__ ((aligned(16))); -__m128i tmp_result3 __attribute__ ((aligned(16))); -__m128i tmp_result4 __attribute__ ((aligned(16))); - - -//============================================================================================== -// Auxiliary Makros - -// calculates psi_a = psi_r*a_r + psi_i*a_i -#define prodsum_psi_a_epi16(psi_r,a_r,psi_i,a_i,psi_a) tmp_result = _mm_mulhi_epi16(psi_r,a_r); tmp_result = _mm_slli_epi16(tmp_result,1); tmp_result2 = _mm_mulhi_epi16(psi_i,a_i); tmp_result2 = _mm_slli_epi16(tmp_result2,1); psi_a = _mm_adds_epi16(tmp_result,tmp_result2); - -// calculate interference magnitude -#define interference_abs_epi16(psi,int_ch_mag,int_mag,c1,c2) tmp_result = _mm_cmplt_epi16(psi,int_ch_mag); tmp_result2 = _mm_xor_si128(tmp_result,(*(__m128i*)&nr_ones[0])); tmp_result = _mm_and_si128(tmp_result,c1); tmp_result2 = _mm_and_si128(tmp_result2,c2); int_mag = _mm_or_si128(tmp_result,tmp_result2); - -// calculate interference magnitude -// tmp_result = nr_ones in shorts corr. to interval 2<=x<=4, tmp_result2 interval < 2, tmp_result3 interval 4<x<6 and tmp_result4 interval x>6 -#define interference_abs_64qam_epi16(psi,int_ch_mag,int_two_ch_mag,int_three_ch_mag,a,c1,c3,c5,c7) tmp_result = _mm_cmplt_epi16(psi,int_two_ch_mag); tmp_result3 = _mm_xor_si128(tmp_result,(*(__m128i*)&nr_ones[0])); tmp_result2 = _mm_cmplt_epi16(psi,int_ch_mag); tmp_result = _mm_xor_si128(tmp_result,tmp_result2); tmp_result4 = _mm_cmpgt_epi16(psi,int_three_ch_mag); tmp_result3 = _mm_xor_si128(tmp_result3,tmp_result4); tmp_result = _mm_and_si128(tmp_result,c3); tmp_result2 = _mm_and_si128(tmp_result2,c1); tmp_result3 = _mm_and_si128(tmp_result3,c5); tmp_result4 = _mm_and_si128(tmp_result4,c7); tmp_result = _mm_or_si128(tmp_result,tmp_result2); tmp_result3 = _mm_or_si128(tmp_result3,tmp_result4); a = _mm_or_si128(tmp_result,tmp_result3); - -// calculates a_sq = int_ch_mag*(a_r^2 + a_i^2)*scale_factor -#define square_a_epi16(a_r,a_i,int_ch_mag,scale_factor,a_sq) tmp_result = _mm_mulhi_epi16(a_r,a_r); tmp_result = _mm_slli_epi16(tmp_result,1); tmp_result = _mm_mulhi_epi16(tmp_result,scale_factor); tmp_result = _mm_slli_epi16(tmp_result,1); tmp_result = _mm_mulhi_epi16(tmp_result,int_ch_mag); tmp_result = _mm_slli_epi16(tmp_result,1); tmp_result2 = _mm_mulhi_epi16(a_i,a_i); tmp_result2 = _mm_slli_epi16(tmp_result2,1); tmp_result2 = _mm_mulhi_epi16(tmp_result2,scale_factor); tmp_result2 = _mm_slli_epi16(tmp_result2,1); tmp_result2 = _mm_mulhi_epi16(tmp_result2,int_ch_mag); tmp_result2 = _mm_slli_epi16(tmp_result2,1); a_sq = _mm_adds_epi16(tmp_result,tmp_result2); - -// calculates a_sq = int_ch_mag*(a_r^2 + a_i^2)*scale_factor for 64-QAM -#define square_a_64qam_epi16(a_r,a_i,int_ch_mag,scale_factor,a_sq) tmp_result = _mm_mulhi_epi16(a_r,a_r); tmp_result = _mm_slli_epi16(tmp_result,1); tmp_result = _mm_mulhi_epi16(tmp_result,scale_factor); tmp_result = _mm_slli_epi16(tmp_result,3); tmp_result = _mm_mulhi_epi16(tmp_result,int_ch_mag); tmp_result = _mm_slli_epi16(tmp_result,1); tmp_result2 = _mm_mulhi_epi16(a_i,a_i); tmp_result2 = _mm_slli_epi16(tmp_result2,1); tmp_result2 = _mm_mulhi_epi16(tmp_result2,scale_factor); tmp_result2 = _mm_slli_epi16(tmp_result2,3); tmp_result2 = _mm_mulhi_epi16(tmp_result2,int_ch_mag); tmp_result2 = _mm_slli_epi16(tmp_result2,1); a_sq = _mm_adds_epi16(tmp_result,tmp_result2); - -#elif defined(__arm__) || defined(__aarch64__) - +__m128i rho_rmi __attribute__((aligned(16))); #endif //============================================================================================== @@ -703,9 +129,9 @@ void nr_dlsch_16qam_llr(NR_DL_FRAME_PARMS *frame_parms, #endif #if defined(__x86_64__) || defined(__i386__) - ch_mag = (__m128i*)&dl_ch_mag[(symbol*nb_rb*12)]; + ch_mag = (__m128i *)dl_ch_mag; #elif defined(__arm__) || defined(__aarch64__) - ch_mag = (int16x8_t*)&dl_ch_mag[(symbol*nb_rb*12)]; + ch_mag = (int16x8_t *)dl_ch_mag; #endif @@ -719,6 +145,7 @@ void nr_dlsch_16qam_llr(NR_DL_FRAME_PARMS *frame_parms, for (i=0; i<len; i++) { #if defined(__x86_64__) || defined(__i386) + __m128i xmm0; xmm0 = _mm_abs_epi16(rxF[i]); xmm0 = _mm_subs_epi16(ch_mag[i],xmm0); @@ -817,6 +244,8 @@ void nr_dlsch_64qam_llr(NR_DL_FRAME_PARMS *frame_parms, for (i=0; i<len2; i++) { #if defined(__x86_64__) || defined(__i386__) + __m128i xmm1, xmm2; + xmm1 = _mm_abs_epi16(rxF[i]); xmm1 = _mm_subs_epi16(ch_mag[i],xmm1); xmm2 = _mm_abs_epi16(xmm1); @@ -940,11 +369,11 @@ void nr_dlsch_256qam_llr(NR_DL_FRAME_PARMS *frame_parms, len2+=((len_mod4==0)?0:1); for (i=0; i<len2; i++) { - xmm1 = _mm_abs_epi16(rxF[i]); + __m128i xmm1 = _mm_abs_epi16(rxF[i]); xmm1 = _mm_subs_epi16(ch_mag[i],xmm1); - xmm2 = _mm_abs_epi16(xmm1); + __m128i xmm2 = _mm_abs_epi16(xmm1); xmm2 = _mm_subs_epi16(ch_magb[i],xmm2); - xmm3 = _mm_abs_epi16(xmm2); + __m128i xmm3 = _mm_abs_epi16(xmm2); xmm3 = _mm_subs_epi16(ch_magr[i], xmm3); llr2[0] = ((short *)&rxF[i])[0]; @@ -1059,8 +488,8 @@ void nr_qpsk_qpsk(short *stream0_in, for (i=0; i<length>>2; i+=2) { // in each iteration, we take 8 complex samples #if defined(__x86_64__) || defined(__i386__) - xmm0 = rho01_128i[i]; // 4 symbols - xmm1 = rho01_128i[i+1]; + __m128i xmm0 = rho01_128i[i]; // 4 symbols + __m128i xmm1 = rho01_128i[i + 1]; // put (rho_r + rho_i)/2sqrt2 in rho_rpi // put (rho_r - rho_i)/2sqrt2 in rho_rmi @@ -1073,10 +502,10 @@ void nr_qpsk_qpsk(short *stream0_in, xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] - xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho) - xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho) - rho_rpi = _mm_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho) - rho_rmi = _mm_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho) + __m128i xmm2 = _mm_unpacklo_epi64(xmm0, xmm1); // Re(rho) + __m128i xmm3 = _mm_unpackhi_epi64(xmm0, xmm1); // Im(rho) + __m128i rho_rpi = _mm_adds_epi16(xmm2, xmm3); // rho = Re(rho) + Im(rho) + __m128i rho_rmi = _mm_subs_epi16(xmm2, xmm3); // rho* = Re(rho) - Im(rho) // divide by sqrt(8), no shift needed ONE_OVER_SQRT_8 = Q1.16 rho_rpi = _mm_mulhi_epi16(rho_rpi,ONE_OVER_SQRT_8); @@ -1100,11 +529,11 @@ void nr_qpsk_qpsk(short *stream0_in, xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] - y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)] - y0i = _mm_unpackhi_epi64(xmm0,xmm1); + __m128i y0r = _mm_unpacklo_epi64(xmm0, xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)] + __m128i y0i = _mm_unpackhi_epi64(xmm0, xmm1); - y0r_over2 = _mm_srai_epi16(y0r,1); // divide by 2 - y0i_over2 = _mm_srai_epi16(y0i,1); // divide by 2 + __m128i y0r_over2 = _mm_srai_epi16(y0r, 1); // divide by 2 + __m128i y0i_over2 = _mm_srai_epi16(y0i, 1); // divide by 2 #elif defined(__arm__) || defined(__aarch64__) @@ -1122,11 +551,11 @@ void nr_qpsk_qpsk(short *stream0_in, xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] - y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)] - y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)] + __m128i y1r = _mm_unpacklo_epi64(xmm0, xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)] + __m128i y1i = _mm_unpackhi_epi64(xmm0, xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)] - y1r_over2 = _mm_srai_epi16(y1r,1); // divide by 2 - y1i_over2 = _mm_srai_epi16(y1i,1); // divide by 2 + __m128i y1r_over2 = _mm_srai_epi16(y1r, 1); // divide by 2 + __m128i y1i_over2 = _mm_srai_epi16(y1i, 1); // divide by 2 // Compute the terms for the LLR of first bit @@ -1138,7 +567,7 @@ void nr_qpsk_qpsk(short *stream0_in, xmm2 = _mm_adds_epi16(A,y0i_over2); // = |y1r/2 - rho/sqrt(8)| + y0i/2 xmm3 = _mm_subs_epi16(y1i_over2,rho_rmi); B = _mm_abs_epi16(xmm3); // B = |y1i/2 - rho*/sqrt(8)| - logmax_num_re0 = _mm_adds_epi16(B,xmm2); // = |y1r/2 - rho/sqrt(8)|+|y1i/2 - rho*/sqrt(8)| + y0i/2 + __m128i logmax_num_re0 = _mm_adds_epi16(B, xmm2); // = |y1r/2 - rho/sqrt(8)|+|y1i/2 - rho*/sqrt(8)| + y0i/2 // 2 term for numerator of LLR xmm3 = _mm_subs_epi16(y1r_over2,rho_rmi); @@ -1155,7 +584,7 @@ void nr_qpsk_qpsk(short *stream0_in, xmm2 = _mm_adds_epi16(E,y0i_over2); // = |y1r/2 + rho*/4| + y0i/2 xmm3 = _mm_subs_epi16(y1i_over2,rho_rpi); F = _mm_abs_epi16(xmm3); // F = |y1i/2 - rho/4| - logmax_den_re0 = _mm_adds_epi16(F,xmm2); // = |y1r/2 + rho*/4| + |y1i/2 - rho/4| + y0i/2 + __m128i logmax_den_re0 = _mm_adds_epi16(F, xmm2); // = |y1r/2 + rho*/4| + |y1i/2 - rho/4| + y0i/2 // 2 term for denominator of LLR xmm3 = _mm_adds_epi16(y1r_over2,rho_rpi); @@ -1170,7 +599,7 @@ void nr_qpsk_qpsk(short *stream0_in, // 1 term for nominator of LLR xmm2 = _mm_adds_epi16(A,y0r_over2); - logmax_num_im0 = _mm_adds_epi16(B,xmm2); // = |y1r/2 - rho/4| + |y1i/2 - rho*/4| + y0r/2 + __m128i logmax_num_im0 = _mm_adds_epi16(B, xmm2); // = |y1r/2 - rho/4| + |y1i/2 - rho*/4| + y0r/2 // 2 term for nominator of LLR xmm2 = _mm_subs_epi16(E,y0r_over2); @@ -1180,7 +609,7 @@ void nr_qpsk_qpsk(short *stream0_in, // 1 term for denominator of LLR xmm2 = _mm_adds_epi16(C,y0r_over2); - logmax_den_im0 = _mm_adds_epi16(D,xmm2); // = |y1r/2 - rho*/4| + |y1i/2 + rho/4| - y0r/2 + __m128i logmax_den_im0 = _mm_adds_epi16(D, xmm2); // = |y1r/2 - rho*/4| + |y1i/2 + rho/4| - y0r/2 xmm2 = _mm_subs_epi16(G,y0r_over2); xmm2 = _mm_adds_epi16(xmm2,H); // = |y1r/2 + rho/4| + |y1i/2 + rho*/4| - y0r/2 @@ -1210,6953 +639,3 @@ void nr_qpsk_qpsk(short *stream0_in, _m_empty(); #endif } - -/* -#if defined(__x86_64__) || defined(__i386__) -__m128i ONE_OVER_SQRT_2 __attribute__((aligned(16))); -__m128i ONE_OVER_SQRT_10 __attribute__((aligned(16))); -__m128i THREE_OVER_SQRT_10 __attribute__((aligned(16))); -__m128i ONE_OVER_SQRT_10_Q15 __attribute__((aligned(16))); -__m128i SQRT_10_OVER_FOUR __attribute__((aligned(16))); -__m128i ch_mag_int; -#endif -*/ -void nr_qpsk_qam16(int16_t *stream0_in, - int16_t *stream1_in, - int16_t *ch_mag_i, - int16_t *stream0_out, - int16_t *rho01, - int32_t length - ) -{ - /* - This function computes the LLRs of stream 0 (s_0) in presence of the interfering stream 1 (s_1) assuming that both symbols are QPSK. It can be used for both MU-MIMO interference-aware receiver or for SU-MIMO receivers. - - Parameters: - stream0_in = Matched filter output y0' = (h0*g0)*y0 - stream1_in = Matched filter output y1' = (h0*g1)*y0 - stream0_out = LLRs - rho01 = Correlation between the two effective channels \rho_{10} = (h1*g1)*(h0*g0) - length = number of resource elements - */ - -#if defined(__x86_64__) || defined(__i386__) - __m128i *rho01_128i = (__m128i *)rho01; - __m128i *stream0_128i_in = (__m128i *)stream0_in; - __m128i *stream1_128i_in = (__m128i *)stream1_in; - __m128i *stream0_128i_out = (__m128i *)stream0_out; - __m128i *ch_mag_128i_i = (__m128i *)ch_mag_i; - __m128i ONE_OVER_SQRT_2 = _mm_set1_epi16(23170); // round(1/sqrt(2)*2^15) - __m128i ONE_OVER_SQRT_10_Q15 = _mm_set1_epi16(10362); // round(1/sqrt(10)*2^15) - __m128i THREE_OVER_SQRT_10 = _mm_set1_epi16(31086); // round(3/sqrt(10)*2^15) - __m128i SQRT_10_OVER_FOUR = _mm_set1_epi16(25905); // round(sqrt(10)/4*2^15) - __m128i ch_mag_int __attribute__((aligned(16))); -#elif defined(__arm__) || defined(__aarch64__) - int16x8_t *rho01_128i = (int16x8_t *)rho01; - int16x8_t *stream0_128i_in = (int16x8_t *)stream0_in; - int16x8_t *stream1_128i_in = (int16x8_t *)stream1_in; - int16x8_t *stream0_128i_out = (int16x8_t *)stream0_out; - int16x8_t *ch_mag_128i_i = (int16x8_t *)ch_mag_i; - int16x8_t ONE_OVER_SQRT_2 = vdupq_n_s16(23170); // round(1/sqrt(2)*2^15) - int16x8_t ONE_OVER_SQRT_10_Q15 = vdupq_n_s16(10362); // round(1/sqrt(10)*2^15) - int16x8_t THREE_OVER_SQRT_10 = vdupq_n_s16(31086); // round(3/sqrt(10)*2^15) - int16x8_t SQRT_10_OVER_FOUR = vdupq_n_s16(25905); // round(sqrt(10)/4*2^15) - int16x8_t ch_mag_int __attribute__((aligned(16))); -#endif - -#ifdef DEBUG_LLR - print_shorts2("rho01_128i:\n",rho01_128i); -#endif - - int i; - - - for (i=0; i<length>>2; i+=2) { - // in each iteration, we take 8 complex samples - -#if defined(__x86_64__) || defined(__i386__) - - xmm0 = rho01_128i[i]; // 4 symbols - xmm1 = rho01_128i[i+1]; - - // put (rho_r + rho_i)/2sqrt2 in rho_rpi - // put (rho_r - rho_i)/2sqrt2 in rho_rmi - xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] - //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] - xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho) - xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho) - rho_rpi = _mm_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho) - rho_rmi = _mm_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho) - - // divide by sqrt(2) - rho_rpi = _mm_mulhi_epi16(rho_rpi, ONE_OVER_SQRT_2); - rho_rmi = _mm_mulhi_epi16(rho_rmi, ONE_OVER_SQRT_2); - rho_rpi = _mm_slli_epi16(rho_rpi,1); - rho_rmi = _mm_slli_epi16(rho_rmi,1); - - // Compute LLR for first bit of stream 0 - - // Compute real and imaginary parts of MF output for stream 0 - xmm0 = stream0_128i_in[i]; - xmm1 = stream0_128i_in[i+1]; - - xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] - //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] - y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)] - y0i = _mm_unpackhi_epi64(xmm0,xmm1); - - // divide by sqrt(2) - y0r_over2 = _mm_mulhi_epi16(y0r, ONE_OVER_SQRT_2); - y0i_over2 = _mm_mulhi_epi16(y0i, ONE_OVER_SQRT_2); - y0r_over2 = _mm_slli_epi16(y0r,1); - y0i_over2 = _mm_slli_epi16(y0i,1); - - y0_p_1_1 = _mm_adds_epi16(y0r_over2, y0i_over2); - y0_m_1_1 = _mm_subs_epi16(y0r_over2, y0i_over2); - - // Compute real and imaginary parts of MF output for stream 1 - xmm0 = stream1_128i_in[i]; - xmm1 = stream1_128i_in[i+1]; - - xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] - //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] - y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)] - y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)] - - xmm0 = _mm_setzero_si128(); // ZERO - - // compute psi - xmm3 = _mm_subs_epi16(y1r,rho_rpi); - psi_r_p1_p1 = _mm_abs_epi16(xmm3); - xmm3 = _mm_subs_epi16(y1i,rho_rmi); - psi_i_p1_p1 = _mm_abs_epi16(xmm3); - xmm3 = _mm_subs_epi16(y1r,rho_rmi); - psi_r_p1_m1 = _mm_abs_epi16(xmm3); - xmm3 = _mm_adds_epi16(y1i,rho_rpi); - psi_i_p1_m1 = _mm_abs_epi16(xmm3); - xmm3 = _mm_adds_epi16(y1r,rho_rmi); - psi_r_m1_p1 = _mm_abs_epi16(xmm3); - xmm3 = _mm_subs_epi16(y1i,rho_rpi); - psi_i_m1_p1 = _mm_abs_epi16(xmm3); - xmm3 = _mm_adds_epi16(y1r,rho_rpi); - psi_r_m1_m1 = _mm_abs_epi16(xmm3); - xmm3 = _mm_adds_epi16(y1i,rho_rmi); - psi_i_m1_m1 = _mm_abs_epi16(xmm3); - - // Rearrange interfering channel magnitudes - xmm2 = ch_mag_128i_i[i]; - xmm3 = ch_mag_128i_i[i+1]; - - xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - - ch_mag_int = _mm_unpacklo_epi64(xmm2,xmm3); - - // calculate optimal interference amplitudes - interference_abs_epi16(psi_r_p1_p1 , ch_mag_int, a_r_p1_p1 , ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p1_p1 , ch_mag_int, a_i_p1_p1 , ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p1_m1 , ch_mag_int, a_r_p1_m1 , ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p1_m1 , ch_mag_int, a_i_p1_m1 , ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m1_p1 , ch_mag_int, a_r_m1_p1 , ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m1_p1 , ch_mag_int, a_i_m1_p1 , ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m1_m1 , ch_mag_int, a_r_m1_m1 , ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m1_m1 , ch_mag_int, a_i_m1_m1 , ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - - // prodsum - prodsum_psi_a_epi16(psi_r_p1_p1, a_r_p1_p1, psi_i_p1_p1, a_i_p1_p1, psi_a_p1_p1); - prodsum_psi_a_epi16(psi_r_p1_m1, a_r_p1_m1, psi_i_p1_m1, a_i_p1_m1, psi_a_p1_m1); - prodsum_psi_a_epi16(psi_r_m1_p1, a_r_m1_p1, psi_i_m1_p1, a_i_m1_p1, psi_a_m1_p1); - prodsum_psi_a_epi16(psi_r_m1_m1, a_r_m1_m1, psi_i_m1_m1, a_i_m1_m1, psi_a_m1_m1); - - // squares - square_a_epi16(a_r_p1_p1, a_i_p1_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p1); - square_a_epi16(a_r_p1_m1, a_i_p1_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m1); - square_a_epi16(a_r_m1_p1, a_i_m1_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p1); - square_a_epi16(a_r_m1_m1, a_i_m1_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m1); - - // Computing Metrics - xmm0 = _mm_subs_epi16(psi_a_p1_p1, a_sq_p1_p1); - bit_met_p1_p1 = _mm_adds_epi16(xmm0, y0_p_1_1); - - xmm0 = _mm_subs_epi16(psi_a_p1_m1, a_sq_p1_m1); - bit_met_p1_m1 = _mm_adds_epi16(xmm0, y0_m_1_1); - - xmm0 = _mm_subs_epi16(psi_a_m1_p1, a_sq_m1_p1); - bit_met_m1_p1 = _mm_subs_epi16(xmm0, y0_m_1_1); - - xmm0 = _mm_subs_epi16(psi_a_m1_m1, a_sq_m1_m1); - bit_met_m1_m1 = _mm_subs_epi16(xmm0, y0_p_1_1); - - // MSB - logmax_num_re0 = _mm_max_epi16(bit_met_p1_p1,bit_met_p1_m1); // bit=0 - logmax_den_re0 = _mm_max_epi16(bit_met_m1_p1,bit_met_m1_m1); // bit=1 - - y0r = _mm_subs_epi16(logmax_num_re0,logmax_den_re0); - - // LSB - logmax_num_im0 = _mm_max_epi16(bit_met_p1_p1,bit_met_m1_p1); // bit=0 - logmax_den_im0 = _mm_max_epi16(bit_met_p1_m1,bit_met_m1_m1); // bit=1 - - y0i = _mm_subs_epi16(logmax_num_im0,logmax_den_im0); - - stream0_128i_out[i] = _mm_unpacklo_epi16(y0r,y0i); // = [L1(1), L2(1), L1(2), L2(2)] - - if (i<((length>>1) - 1)) // false if only 2 REs remain - stream0_128i_out[i+1] = _mm_unpackhi_epi16(y0r,y0i); - -#elif defined(__arm__) || defined(__aarch64__) - -#endif - } - -#if defined(__x86_64__) || defined(__i386__) - _mm_empty(); - _m_empty(); -#endif -} - -/* -__m128i ONE_OVER_SQRT_2_42 __attribute__((aligned(16))); -__m128i THREE_OVER_SQRT_2_42 __attribute__((aligned(16))); -__m128i FIVE_OVER_SQRT_2_42 __attribute__((aligned(16))); -__m128i SEVEN_OVER_SQRT_2_42 __attribute__((aligned(16))); - -__m128i ch_mag_int_with_sigma2 __attribute__((aligned(16))); -__m128i two_ch_mag_int_with_sigma2 __attribute__((aligned(16))); -__m128i three_ch_mag_int_with_sigma2 __attribute__((aligned(16))); -__m128i SQRT_42_OVER_FOUR __attribute__((aligned(16))); -*/ -void nr_qpsk_qam64(short *stream0_in, - short *stream1_in, - short *ch_mag_i, - short *stream0_out, - short *rho01, - int length - ) -{ - - /* - This function computes the LLRs of stream 0 (s_0) in presence of the interfering stream 1 (s_1) assuming that both symbols are QPSK. It can be used for both MU-MIMO interference-aware receiver or for SU-MIMO receivers. - - Parameters: - stream0_in = Matched filter output y0' = (h0*g0)*y0 - stream1_in = Matched filter output y1' = (h0*g1)*y0 - stream0_out = LLRs - rho01 = Correlation between the two effective channels \rho_{10} = (h1*g1)*(h0*g0) - length = number of resource elements - */ - -#if defined(__x86_64__) || defined(__i386__) - __m128i *rho01_128i = (__m128i *)rho01; - __m128i *stream0_128i_in = (__m128i *)stream0_in; - __m128i *stream1_128i_in = (__m128i *)stream1_in; - __m128i *stream0_128i_out = (__m128i *)stream0_out; - __m128i *ch_mag_128i_i = (__m128i *)ch_mag_i; - __m128i ONE_OVER_SQRT_2 = _mm_set1_epi16(23170); // round(1/sqrt(2)*2^15) - __m128i ONE_OVER_SQRT_2_42 = _mm_set1_epi16(3575); // round(1/sqrt(2*42)*2^15) - __m128i THREE_OVER_SQRT_2_42 = _mm_set1_epi16(10726); // round(3/sqrt(2*42)*2^15) - __m128i FIVE_OVER_SQRT_2_42 = _mm_set1_epi16(17876); // round(5/sqrt(2*42)*2^15) - __m128i SEVEN_OVER_SQRT_2_42 = _mm_set1_epi16(25027); // round(7/sqrt(2*42)*2^15) - __m128i SQRT_42_OVER_FOUR = _mm_set1_epi16(13272); // round(sqrt(42)/4*2^13), Q3.1 - __m128i ch_mag_int; - __m128i ch_mag_int_with_sigma2; - __m128i two_ch_mag_int_with_sigma2; - __m128i three_ch_mag_int_with_sigma2; -#elif defined(__arm__) || defined(__aarch64__) - -#endif - -#ifdef DEBUG_LLR - print_shorts2("rho01_128i:\n",rho01_128i); -#endif - - int i; - - - for (i=0; i<length>>2; i+=2) { - // in each iteration, we take 8 complex samples - -#if defined(__x86_64__) || defined(__i386__) - - xmm0 = rho01_128i[i]; // 4 symbols - xmm1 = rho01_128i[i+1]; - - // put (rho_r + rho_i)/sqrt2 in rho_rpi - // put (rho_r - rho_i)/sqrt2 in rho_rmi - xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] - //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] - xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho) - xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho) - rho_rpi = _mm_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho) - rho_rmi = _mm_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho) - - // divide by sqrt(2) - rho_rpi = _mm_mulhi_epi16(rho_rpi, ONE_OVER_SQRT_2); - rho_rmi = _mm_mulhi_epi16(rho_rmi, ONE_OVER_SQRT_2); - rho_rpi = _mm_slli_epi16(rho_rpi,1); - rho_rmi = _mm_slli_epi16(rho_rmi,1); - - // Compute LLR for first bit of stream 0 - - // Compute real and imaginary parts of MF output for stream 0 - xmm0 = stream0_128i_in[i]; - xmm1 = stream0_128i_in[i+1]; - - xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] - //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] - y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)] - y0i = _mm_unpackhi_epi64(xmm0,xmm1); - - // divide by sqrt(2) - y0r_over2 = _mm_mulhi_epi16(y0r, ONE_OVER_SQRT_2); - y0i_over2 = _mm_mulhi_epi16(y0i, ONE_OVER_SQRT_2); - y0r_over2 = _mm_slli_epi16(y0r,1); - y0i_over2 = _mm_slli_epi16(y0i,1); - - y0_p_1_1 = _mm_adds_epi16(y0r_over2, y0i_over2); - y0_m_1_1 = _mm_subs_epi16(y0r_over2, y0i_over2); - - // Compute real and imaginary parts of MF output for stream 1 - xmm0 = stream1_128i_in[i]; - xmm1 = stream1_128i_in[i+1]; - - xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] - //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] - y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)] - y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)] - - xmm0 = _mm_setzero_si128(); // ZERO - - // compute psi - xmm3 = _mm_subs_epi16(y1r,rho_rpi); - psi_r_p1_p1 = _mm_abs_epi16(xmm3); - xmm3 = _mm_subs_epi16(y1i,rho_rmi); - psi_i_p1_p1 = _mm_abs_epi16(xmm3); - xmm3 = _mm_subs_epi16(y1r,rho_rmi); - psi_r_p1_m1 = _mm_abs_epi16(xmm3); - xmm3 = _mm_adds_epi16(y1i,rho_rpi); - psi_i_p1_m1 = _mm_abs_epi16(xmm3); - xmm3 = _mm_adds_epi16(y1r,rho_rmi); - psi_r_m1_p1 = _mm_abs_epi16(xmm3); - xmm3 = _mm_subs_epi16(y1i,rho_rpi); - psi_i_m1_p1 = _mm_abs_epi16(xmm3); - xmm3 = _mm_adds_epi16(y1r,rho_rpi); - psi_r_m1_m1 = _mm_abs_epi16(xmm3); - xmm3 = _mm_adds_epi16(y1i,rho_rmi); - psi_i_m1_m1 = _mm_abs_epi16(xmm3); - - // Rearrange interfering channel magnitudes - xmm2 = ch_mag_128i_i[i]; - xmm3 = ch_mag_128i_i[i+1]; - - xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - - ch_mag_int = _mm_unpacklo_epi64(xmm2,xmm3); - ch_mag_int_with_sigma2 = _mm_srai_epi16(ch_mag_int, 1); // *2 - two_ch_mag_int_with_sigma2 = ch_mag_int; // *4 - three_ch_mag_int_with_sigma2 = _mm_adds_epi16(ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2); // *6 - - interference_abs_64qam_epi16(psi_r_p1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - - // prodsum - prodsum_psi_a_epi16(psi_r_p1_p1, a_r_p1_p1, psi_i_p1_p1, a_i_p1_p1, psi_a_p1_p1); - prodsum_psi_a_epi16(psi_r_p1_m1, a_r_p1_m1, psi_i_p1_m1, a_i_p1_m1, psi_a_p1_m1); - prodsum_psi_a_epi16(psi_r_m1_p1, a_r_m1_p1, psi_i_m1_p1, a_i_m1_p1, psi_a_m1_p1); - prodsum_psi_a_epi16(psi_r_m1_m1, a_r_m1_m1, psi_i_m1_m1, a_i_m1_m1, psi_a_m1_m1); - - // Multiply by sqrt(2) - psi_a_p1_p1 = _mm_mulhi_epi16(psi_a_p1_p1, ONE_OVER_SQRT_2); - psi_a_p1_p1 = _mm_slli_epi16(psi_a_p1_p1, 2); - psi_a_p1_m1 = _mm_mulhi_epi16(psi_a_p1_m1, ONE_OVER_SQRT_2); - psi_a_p1_m1 = _mm_slli_epi16(psi_a_p1_m1, 2); - psi_a_m1_p1 = _mm_mulhi_epi16(psi_a_m1_p1, ONE_OVER_SQRT_2); - psi_a_m1_p1 = _mm_slli_epi16(psi_a_m1_p1, 2); - psi_a_m1_m1 = _mm_mulhi_epi16(psi_a_m1_m1, ONE_OVER_SQRT_2); - psi_a_m1_m1 = _mm_slli_epi16(psi_a_m1_m1, 2); - - square_a_64qam_epi16(a_r_p1_p1, a_i_p1_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p1); - square_a_64qam_epi16(a_r_p1_m1, a_i_p1_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m1); - square_a_64qam_epi16(a_r_m1_p1, a_i_m1_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p1); - square_a_64qam_epi16(a_r_m1_m1, a_i_m1_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m1); - - // Computing Metrics - xmm0 = _mm_subs_epi16(psi_a_p1_p1, a_sq_p1_p1); - bit_met_p1_p1 = _mm_adds_epi16(xmm0, y0_p_1_1); - - xmm0 = _mm_subs_epi16(psi_a_p1_m1, a_sq_p1_m1); - bit_met_p1_m1 = _mm_adds_epi16(xmm0, y0_m_1_1); - - xmm0 = _mm_subs_epi16(psi_a_m1_p1, a_sq_m1_p1); - bit_met_m1_p1 = _mm_subs_epi16(xmm0, y0_m_1_1); - - xmm0 = _mm_subs_epi16(psi_a_m1_m1, a_sq_m1_m1); - bit_met_m1_m1 = _mm_subs_epi16(xmm0, y0_p_1_1); - - // MSB - logmax_num_re0 = _mm_max_epi16(bit_met_p1_p1,bit_met_p1_m1); // bit=0 - logmax_den_re0 = _mm_max_epi16(bit_met_m1_p1,bit_met_m1_m1); // bit=1 - - y0r = _mm_subs_epi16(logmax_num_re0,logmax_den_re0); - - // LSB - logmax_num_im0 = _mm_max_epi16(bit_met_p1_p1,bit_met_m1_p1); // bit=0 - logmax_den_im0 = _mm_max_epi16(bit_met_p1_m1,bit_met_m1_m1); // bit=1 - - y0i = _mm_subs_epi16(logmax_num_im0,logmax_den_im0); - - stream0_128i_out[i] = _mm_unpacklo_epi16(y0r,y0i); // = [L1(1), L2(1), L1(2), L2(2)] - - if (i<((length>>1) - 1)) // false if only 2 REs remain - stream0_128i_out[i+1] = _mm_unpackhi_epi16(y0r,y0i); - -#elif defined(__arm__) || defined(__aarch64__) - -#endif - } - -#if defined(__x86_64__) || defined(__i386__) - _mm_empty(); - _m_empty(); -#endif -} - - -//---------------------------------------------------------------------------------------------- -// 16-QAM -//---------------------------------------------------------------------------------------------- - -/* -__m128i ONE_OVER_TWO_SQRT_10 __attribute__((aligned(16))); -__m128i NINE_OVER_TWO_SQRT_10 __attribute__((aligned(16))); - -__m128i y0r_over_sqrt10 __attribute__ ((aligned(16))); -__m128i y0i_over_sqrt10 __attribute__ ((aligned(16))); -__m128i y0r_three_over_sqrt10 __attribute__ ((aligned(16))); -__m128i y0i_three_over_sqrt10 __attribute__ ((aligned(16))); - -__m128i ch_mag_des __attribute__((aligned(16))); -__m128i ch_mag_over_10 __attribute__ ((aligned(16))); -__m128i ch_mag_over_2 __attribute__ ((aligned(16))); -__m128i ch_mag_9_over_10 __attribute__ ((aligned(16))); -*/ - -void nr_qam16_qpsk(short *stream0_in, - short *stream1_in, - short *ch_mag, - short *stream0_out, - short *rho01, - int length - ) -{ - - /* - Author: Sebastian Wagner - Date: 2012-06-04 - - Input: - stream0_in: MF filter for 1st stream, i.e., y0=h0'*y - stream!_in: MF filter for 2nd stream, i.e., y1=h1'*y - ch_mag: 2*h0/sqrt(00), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc - ch_mag_i: 2*h1/sqrt(00), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc - rho01: Channel cross correlation, i.e., h1'*h0 - - Output: - stream0_out: output LLRs for 1st stream - */ - -#if defined(__x86_64__) || defined(__i386__) - __m128i *rho01_128i = (__m128i *)rho01; - __m128i *stream0_128i_in = (__m128i *)stream0_in; - __m128i *stream1_128i_in = (__m128i *)stream1_in; - __m128i *stream0_128i_out = (__m128i *)stream0_out; - __m128i *ch_mag_128i = (__m128i *)ch_mag; - __m128i ONE_OVER_SQRT_2 = _mm_set1_epi16(23170); // round(1/sqrt(2)*2^15) - __m128i ONE_OVER_SQRT_10 = _mm_set1_epi16(20724); // round(1/sqrt(10)*2^16) - __m128i THREE_OVER_SQRT_10 = _mm_set1_epi16(31086); // round(3/sqrt(10)*2^15) - __m128i SQRT_10_OVER_FOUR = _mm_set1_epi16(25905); // round(sqrt(10)/4*2^15) - __m128i ONE_OVER_TWO_SQRT_10 = _mm_set1_epi16(10362); // round(1/2/sqrt(10)*2^16) - __m128i NINE_OVER_TWO_SQRT_10 = _mm_set1_epi16(23315); // round(9/2/sqrt(10)*2^14) - __m128i y0r_over_sqrt10; - __m128i y0i_over_sqrt10; - __m128i y0r_three_over_sqrt10; - __m128i y0i_three_over_sqrt10; - - __m128i ch_mag_des; - __m128i ch_mag_over_10; - __m128i ch_mag_over_2; - __m128i ch_mag_9_over_10; -#elif defined(__arm__) || defined(__aarch64__) - -#endif - - int i; - - - for (i=0; i<length>>2; i+=2) { - // In one iteration, we deal with 8 REs - -#if defined(__x86_64__) || defined(__i386__) - // Get rho - xmm0 = rho01_128i[i]; - xmm1 = rho01_128i[i+1]; - xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] - //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] - xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho) - xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho) - rho_rpi = _mm_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho) - rho_rmi = _mm_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho) - - // Compute the different rhos - rho_rpi_1_1 = _mm_mulhi_epi16(rho_rpi,ONE_OVER_SQRT_10); - rho_rmi_1_1 = _mm_mulhi_epi16(rho_rmi,ONE_OVER_SQRT_10); - rho_rpi_3_3 = _mm_mulhi_epi16(rho_rpi,THREE_OVER_SQRT_10); - rho_rmi_3_3 = _mm_mulhi_epi16(rho_rmi,THREE_OVER_SQRT_10); - rho_rpi_3_3 = _mm_slli_epi16(rho_rpi_3_3,1); - rho_rmi_3_3 = _mm_slli_epi16(rho_rmi_3_3,1); - - xmm4 = _mm_mulhi_epi16(xmm2,ONE_OVER_SQRT_10); // Re(rho) - xmm5 = _mm_mulhi_epi16(xmm3,THREE_OVER_SQRT_10); // Im(rho) - xmm5 = _mm_slli_epi16(xmm5,1); - - rho_rpi_1_3 = _mm_adds_epi16(xmm4,xmm5); - rho_rmi_1_3 = _mm_subs_epi16(xmm4,xmm5); - - xmm6 = _mm_mulhi_epi16(xmm2,THREE_OVER_SQRT_10); // Re(rho) - xmm7 = _mm_mulhi_epi16(xmm3,ONE_OVER_SQRT_10); // Im(rho) - xmm6 = _mm_slli_epi16(xmm6,1); - - rho_rpi_3_1 = _mm_adds_epi16(xmm6,xmm7); - rho_rmi_3_1 = _mm_subs_epi16(xmm6,xmm7); - - // Rearrange interfering MF output - xmm0 = stream1_128i_in[i]; - xmm1 = stream1_128i_in[i+1]; - xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] - //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] - y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)] - y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)] - - xmm0 = _mm_setzero_si128(); // ZERO - xmm2 = _mm_subs_epi16(rho_rpi_1_1,y1r); // = [Re(rho)+ Im(rho)]/sqrt(10) - y1r - psi_r_p1_p1 = _mm_abs_epi16(xmm2); // = |[Re(rho)+ Im(rho)]/sqrt(10) - y1r| - - xmm2= _mm_subs_epi16(rho_rmi_1_1,y1r); - psi_r_p1_m1 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rmi_1_1,y1i); - psi_i_p1_p1 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rpi_1_3,y1r); - psi_r_p1_p3 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rmi_1_3,y1r); - psi_r_p1_m3 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rmi_3_1,y1i); - psi_i_p1_p3 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rpi_3_1,y1r); - psi_r_p3_p1 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rmi_3_1,y1r); - psi_r_p3_m1 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rmi_1_3,y1i); - psi_i_p3_p1 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rpi_3_3,y1r); - psi_r_p3_p3 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rmi_3_3,y1r); - psi_r_p3_m3 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rmi_3_3,y1i); - psi_i_p3_p3 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rpi_1_1,y1i); - psi_i_m1_p1 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rpi_3_1,y1i); - psi_i_m1_p3 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rpi_1_3,y1i); - psi_i_m3_p1 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rpi_3_3,y1i); - psi_i_m3_p3 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(rho_rpi_1_1,y1i); - psi_i_p1_m1 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(rho_rpi_3_1,y1i); - psi_i_p1_m3 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(rho_rpi_1_3,y1i); - psi_i_p3_m1 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(rho_rpi_3_3,y1i); - psi_i_p3_m3 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(rho_rpi_1_1,y1r); - psi_r_m1_m1 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(rho_rpi_1_3,y1r); - psi_r_m1_m3 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(rho_rpi_3_1,y1r); - psi_r_m3_m1 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(rho_rpi_3_3,y1r); - psi_r_m3_m3 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(y1r,rho_rmi_1_1); - psi_r_m1_p1 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(y1r,rho_rmi_1_3); - psi_r_m1_p3 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(y1i,rho_rmi_1_1); - psi_i_m1_m1 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(y1i,rho_rmi_3_1); - psi_i_m1_m3 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(y1r,rho_rmi_3_1); - psi_r_m3_p1 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(y1r,rho_rmi_3_3); - psi_r_m3_p3 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(y1i,rho_rmi_1_3); - psi_i_m3_m1 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(y1i,rho_rmi_3_3); - psi_i_m3_m3 = _mm_abs_epi16(xmm2); - - // Rearrange desired MF output - xmm0 = stream0_128i_in[i]; - xmm1 = stream0_128i_in[i+1]; - xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] - //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] - y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)] - y0i = _mm_unpackhi_epi64(xmm0,xmm1); - - // Rearrange desired channel magnitudes - xmm2 = ch_mag_128i[i]; // = [|h|^2(1),|h|^2(1),|h|^2(2),|h|^2(2)]*(2/sqrt(10)) - xmm3 = ch_mag_128i[i+1]; // = [|h|^2(3),|h|^2(3),|h|^2(4),|h|^2(4)]*(2/sqrt(10)) - xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - - ch_mag_des = _mm_unpacklo_epi64(xmm2,xmm3); // = [|h|^2(1),|h|^2(2),|h|^2(3),|h|^2(4)]*(2/sqrt(10)) - - // Scale MF output of desired signal - y0r_over_sqrt10 = _mm_mulhi_epi16(y0r,ONE_OVER_SQRT_10); - y0i_over_sqrt10 = _mm_mulhi_epi16(y0i,ONE_OVER_SQRT_10); - y0r_three_over_sqrt10 = _mm_mulhi_epi16(y0r,THREE_OVER_SQRT_10); - y0i_three_over_sqrt10 = _mm_mulhi_epi16(y0i,THREE_OVER_SQRT_10); - y0r_three_over_sqrt10 = _mm_slli_epi16(y0r_three_over_sqrt10,1); - y0i_three_over_sqrt10 = _mm_slli_epi16(y0i_three_over_sqrt10,1); - - // Compute necessary combination of required terms - y0_p_1_1 = _mm_adds_epi16(y0r_over_sqrt10,y0i_over_sqrt10); - y0_m_1_1 = _mm_subs_epi16(y0r_over_sqrt10,y0i_over_sqrt10); - - y0_p_1_3 = _mm_adds_epi16(y0r_over_sqrt10,y0i_three_over_sqrt10); - y0_m_1_3 = _mm_subs_epi16(y0r_over_sqrt10,y0i_three_over_sqrt10); - - y0_p_3_1 = _mm_adds_epi16(y0r_three_over_sqrt10,y0i_over_sqrt10); - y0_m_3_1 = _mm_subs_epi16(y0r_three_over_sqrt10,y0i_over_sqrt10); - - y0_p_3_3 = _mm_adds_epi16(y0r_three_over_sqrt10,y0i_three_over_sqrt10); - y0_m_3_3 = _mm_subs_epi16(y0r_three_over_sqrt10,y0i_three_over_sqrt10); - - // Add psi - psi_a_p1_p1 = _mm_adds_epi16(psi_r_p1_p1 ,psi_i_p1_p1); - psi_a_p1_p3 = _mm_adds_epi16(psi_r_p1_p3 ,psi_i_p1_p3); - psi_a_p3_p1 = _mm_adds_epi16(psi_r_p3_p1 ,psi_i_p3_p1); - psi_a_p3_p3 = _mm_adds_epi16(psi_r_p3_p3 ,psi_i_p3_p3); - psi_a_p1_m1 = _mm_adds_epi16(psi_r_p1_m1 ,psi_i_p1_m1); - psi_a_p1_m3 = _mm_adds_epi16(psi_r_p1_m3 ,psi_i_p1_m3); - psi_a_p3_m1 = _mm_adds_epi16(psi_r_p3_m1 ,psi_i_p3_m1); - psi_a_p3_m3 = _mm_adds_epi16(psi_r_p3_m3 ,psi_i_p3_m3); - psi_a_m1_p1 = _mm_adds_epi16(psi_r_m1_p1 ,psi_i_m1_p1); - psi_a_m1_p3 = _mm_adds_epi16(psi_r_m1_p3 ,psi_i_m1_p3); - psi_a_m3_p1 = _mm_adds_epi16(psi_r_m3_p1 ,psi_i_m3_p1); - psi_a_m3_p3 = _mm_adds_epi16(psi_r_m3_p3 ,psi_i_m3_p3); - psi_a_m1_m1 = _mm_adds_epi16(psi_r_m1_m1 ,psi_i_m1_m1); - psi_a_m1_m3 = _mm_adds_epi16(psi_r_m1_m3 ,psi_i_m1_m3); - psi_a_m3_m1 = _mm_adds_epi16(psi_r_m3_m1 ,psi_i_m3_m1); - psi_a_m3_m3 = _mm_adds_epi16(psi_r_m3_m3 ,psi_i_m3_m3); - - // scale by sqrt(2) - psi_a_p1_p1 = _mm_mulhi_epi16(psi_a_p1_p1,ONE_OVER_SQRT_2); - psi_a_p1_p1 = _mm_slli_epi16(psi_a_p1_p1,1); - psi_a_p1_p3 = _mm_mulhi_epi16(psi_a_p1_p3,ONE_OVER_SQRT_2); - psi_a_p1_p3 = _mm_slli_epi16(psi_a_p1_p3,1); - psi_a_p3_p1 = _mm_mulhi_epi16(psi_a_p3_p1,ONE_OVER_SQRT_2); - psi_a_p3_p1 = _mm_slli_epi16(psi_a_p3_p1,1); - psi_a_p3_p3 = _mm_mulhi_epi16(psi_a_p3_p3,ONE_OVER_SQRT_2); - psi_a_p3_p3 = _mm_slli_epi16(psi_a_p3_p3,1); - - psi_a_p1_m1 = _mm_mulhi_epi16(psi_a_p1_m1,ONE_OVER_SQRT_2); - psi_a_p1_m1 = _mm_slli_epi16(psi_a_p1_m1,1); - psi_a_p1_m3 = _mm_mulhi_epi16(psi_a_p1_m3,ONE_OVER_SQRT_2); - psi_a_p1_m3 = _mm_slli_epi16(psi_a_p1_m3,1); - psi_a_p3_m1 = _mm_mulhi_epi16(psi_a_p3_m1,ONE_OVER_SQRT_2); - psi_a_p3_m1 = _mm_slli_epi16(psi_a_p3_m1,1); - psi_a_p3_m3 = _mm_mulhi_epi16(psi_a_p3_m3,ONE_OVER_SQRT_2); - psi_a_p3_m3 = _mm_slli_epi16(psi_a_p3_m3,1); - - psi_a_m1_p1 = _mm_mulhi_epi16(psi_a_m1_p1,ONE_OVER_SQRT_2); - psi_a_m1_p1 = _mm_slli_epi16(psi_a_m1_p1,1); - psi_a_m1_p3 = _mm_mulhi_epi16(psi_a_m1_p3,ONE_OVER_SQRT_2); - psi_a_m1_p3 = _mm_slli_epi16(psi_a_m1_p3,1); - psi_a_m3_p1 = _mm_mulhi_epi16(psi_a_m3_p1,ONE_OVER_SQRT_2); - psi_a_m3_p1 = _mm_slli_epi16(psi_a_m3_p1,1); - psi_a_m3_p3 = _mm_mulhi_epi16(psi_a_m3_p3,ONE_OVER_SQRT_2); - psi_a_m3_p3 = _mm_slli_epi16(psi_a_m3_p3,1); - - psi_a_m1_m1 = _mm_mulhi_epi16(psi_a_m1_m1,ONE_OVER_SQRT_2); - psi_a_m1_m1 = _mm_slli_epi16(psi_a_m1_m1,1); - psi_a_m1_m3 = _mm_mulhi_epi16(psi_a_m1_m3,ONE_OVER_SQRT_2); - psi_a_m1_m3 = _mm_slli_epi16(psi_a_m1_m3,1); - psi_a_m3_m1 = _mm_mulhi_epi16(psi_a_m3_m1,ONE_OVER_SQRT_2); - psi_a_m3_m1 = _mm_slli_epi16(psi_a_m3_m1,1); - psi_a_m3_m3 = _mm_mulhi_epi16(psi_a_m3_m3,ONE_OVER_SQRT_2); - psi_a_m3_m3 = _mm_slli_epi16(psi_a_m3_m3,1); - - // Computing different multiples of channel norms - ch_mag_over_10=_mm_mulhi_epi16(ch_mag_des, ONE_OVER_TWO_SQRT_10); - ch_mag_over_2=_mm_mulhi_epi16(ch_mag_des, SQRT_10_OVER_FOUR); - ch_mag_over_2=_mm_slli_epi16(ch_mag_over_2, 1); - ch_mag_9_over_10=_mm_mulhi_epi16(ch_mag_des, NINE_OVER_TWO_SQRT_10); - ch_mag_9_over_10=_mm_slli_epi16(ch_mag_9_over_10, 2); - - // Computing Metrics - xmm1 = _mm_adds_epi16(psi_a_p1_p1, y0_p_1_1); - bit_met_p1_p1= _mm_subs_epi16(xmm1, ch_mag_over_10); - - xmm1 = _mm_adds_epi16(psi_a_p1_p3, y0_p_1_3); - bit_met_p1_p3= _mm_subs_epi16(xmm1, ch_mag_over_2); - - xmm1 = _mm_adds_epi16(psi_a_p1_m1, y0_m_1_1); - bit_met_p1_m1= _mm_subs_epi16(xmm1, ch_mag_over_10); - - xmm1 = _mm_adds_epi16(psi_a_p1_m3, y0_m_1_3); - bit_met_p1_m3= _mm_subs_epi16(xmm1, ch_mag_over_2); - - xmm1 = _mm_adds_epi16(psi_a_p3_p1, y0_p_3_1); - bit_met_p3_p1= _mm_subs_epi16(xmm1, ch_mag_over_2); - - xmm1 = _mm_adds_epi16(psi_a_p3_p3, y0_p_3_3); - bit_met_p3_p3= _mm_subs_epi16(xmm1, ch_mag_9_over_10); - - xmm1 = _mm_adds_epi16(psi_a_p3_m1, y0_m_3_1); - bit_met_p3_m1= _mm_subs_epi16(xmm1, ch_mag_over_2); - - xmm1 = _mm_adds_epi16(psi_a_p3_m3, y0_m_3_3); - bit_met_p3_m3= _mm_subs_epi16(xmm1, ch_mag_9_over_10); - - xmm1 = _mm_subs_epi16(psi_a_m1_p1, y0_m_1_1); - bit_met_m1_p1= _mm_subs_epi16(xmm1, ch_mag_over_10); - - xmm1 = _mm_subs_epi16(psi_a_m1_p3, y0_m_1_3); - bit_met_m1_p3= _mm_subs_epi16(xmm1, ch_mag_over_2); - - xmm1 = _mm_subs_epi16(psi_a_m1_m1, y0_p_1_1); - bit_met_m1_m1= _mm_subs_epi16(xmm1, ch_mag_over_10); - - xmm1 = _mm_subs_epi16(psi_a_m1_m3, y0_p_1_3); - bit_met_m1_m3= _mm_subs_epi16(xmm1, ch_mag_over_2); - - xmm1 = _mm_subs_epi16(psi_a_m3_p1, y0_m_3_1); - bit_met_m3_p1= _mm_subs_epi16(xmm1, ch_mag_over_2); - - xmm1 = _mm_subs_epi16(psi_a_m3_p3, y0_m_3_3); - bit_met_m3_p3= _mm_subs_epi16(xmm1, ch_mag_9_over_10); - - xmm1 = _mm_subs_epi16(psi_a_m3_m1, y0_p_3_1); - bit_met_m3_m1= _mm_subs_epi16(xmm1, ch_mag_over_2); - - xmm1 = _mm_subs_epi16(psi_a_m3_m3, y0_p_3_3); - bit_met_m3_m3= _mm_subs_epi16(xmm1, ch_mag_9_over_10); - - // LLR of the first bit - // Bit = 1 - xmm0 = _mm_max_epi16(bit_met_m1_p1,bit_met_m1_p3); - xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m1_m3); - xmm2 = _mm_max_epi16(bit_met_m3_p1,bit_met_m3_p3); - xmm3 = _mm_max_epi16(bit_met_m3_m1,bit_met_m3_m3); - xmm4 = _mm_max_epi16(xmm0,xmm1); - xmm5 = _mm_max_epi16(xmm2,xmm3); - logmax_num_re0= _mm_max_epi16(xmm4,xmm5); - - // Bit = 0 - xmm0 = _mm_max_epi16(bit_met_p1_p1,bit_met_p1_p3); - xmm1 = _mm_max_epi16(bit_met_p1_m1,bit_met_p1_m3); - xmm2 = _mm_max_epi16(bit_met_p3_p1,bit_met_p3_p3); - xmm3 = _mm_max_epi16(bit_met_p3_m1,bit_met_p3_m3); - xmm4 = _mm_max_epi16(xmm0,xmm1); - xmm5 = _mm_max_epi16(xmm2,xmm3); - logmax_den_re0 = _mm_max_epi16(xmm4,xmm5); - - // LLR of first bit [L1(1), L1(2), L1(3), L1(4), L1(5), L1(6), L1(7), L1(8)] - y0r = _mm_subs_epi16(logmax_den_re0,logmax_num_re0); - - // LLR of the second bit - // Bit = 1 - xmm0 = _mm_max_epi16(bit_met_p1_m1,bit_met_p3_m1); - xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m3_m1); - xmm2 = _mm_max_epi16(bit_met_p1_m3,bit_met_p3_m3); - xmm3 = _mm_max_epi16(bit_met_m1_m3,bit_met_m3_m3); - xmm4 = _mm_max_epi16(xmm0,xmm1); - xmm5 = _mm_max_epi16(xmm2,xmm3); - logmax_num_re1 = _mm_max_epi16(xmm4,xmm5); - - // Bit = 0 - xmm0 = _mm_max_epi16(bit_met_p1_p1,bit_met_p3_p1); - xmm1 = _mm_max_epi16(bit_met_m1_p1,bit_met_m3_p1); - xmm2 = _mm_max_epi16(bit_met_p1_p3,bit_met_p3_p3); - xmm3 = _mm_max_epi16(bit_met_m1_p3,bit_met_m3_p3); - xmm4 = _mm_max_epi16(xmm0,xmm1); - xmm5 = _mm_max_epi16(xmm2,xmm3); - logmax_den_re1 = _mm_max_epi16(xmm4,xmm5); - - // LLR of second bit [L2(1), L2(2), L2(3), L2(4)] - y1r = _mm_subs_epi16(logmax_den_re1,logmax_num_re1); - - // LLR of the third bit - // Bit = 1 - xmm0 = _mm_max_epi16(bit_met_m3_p1,bit_met_m3_p3); - xmm1 = _mm_max_epi16(bit_met_m3_m1,bit_met_m3_m3); - xmm2 = _mm_max_epi16(bit_met_p3_p1,bit_met_p3_p3); - xmm3 = _mm_max_epi16(bit_met_p3_m1,bit_met_p3_m3); - xmm4 = _mm_max_epi16(xmm0,xmm1); - xmm5 = _mm_max_epi16(xmm2,xmm3); - logmax_num_im0 = _mm_max_epi16(xmm4,xmm5); - - // Bit = 0 - xmm0 = _mm_max_epi16(bit_met_m1_p1,bit_met_m1_p3); - xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m1_m3); - xmm2 = _mm_max_epi16(bit_met_p1_p1,bit_met_p1_p3); - xmm3 = _mm_max_epi16(bit_met_p1_m1,bit_met_p1_m3); - xmm4 = _mm_max_epi16(xmm0,xmm1); - xmm5 = _mm_max_epi16(xmm2,xmm3); - logmax_den_im0 = _mm_max_epi16(xmm4,xmm5); - - // LLR of third bit [L3(1), L3(2), L3(3), L3(4)] - y0i = _mm_subs_epi16(logmax_den_im0,logmax_num_im0); - - // LLR of the fourth bit - // Bit = 1 - xmm0 = _mm_max_epi16(bit_met_p1_m3,bit_met_p3_m3); - xmm1 = _mm_max_epi16(bit_met_m1_m3,bit_met_m3_m3); - xmm2 = _mm_max_epi16(bit_met_p1_p3,bit_met_p3_p3); - xmm3 = _mm_max_epi16(bit_met_m1_p3,bit_met_m3_p3); - xmm4 = _mm_max_epi16(xmm0,xmm1); - xmm5 = _mm_max_epi16(xmm2,xmm3); - logmax_num_im1 = _mm_max_epi16(xmm4,xmm5); - - // Bit = 0 - xmm0 = _mm_max_epi16(bit_met_p1_m1,bit_met_p3_m1); - xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m3_m1); - xmm2 = _mm_max_epi16(bit_met_p1_p1,bit_met_p3_p1); - xmm3 = _mm_max_epi16(bit_met_m1_p1,bit_met_m3_p1); - xmm4 = _mm_max_epi16(xmm0,xmm1); - xmm5 = _mm_max_epi16(xmm2,xmm3); - logmax_den_im1 = _mm_max_epi16(xmm4,xmm5); - - // LLR of fourth bit [L4(1), L4(2), L4(3), L4(4)] - y1i = _mm_subs_epi16(logmax_den_im1,logmax_num_im1); - - // Pack LLRs in output - // [L1(1), L2(1), L1(2), L2(2), L1(3), L2(3), L1(4), L2(4)] - xmm0 = _mm_unpacklo_epi16(y0r,y1r); - // [L1(5), L2(5), L1(6), L2(6), L1(7), L2(7), L1(8), L2(8)] - xmm1 = _mm_unpackhi_epi16(y0r,y1r); - // [L3(1), L4(1), L3(2), L4(2), L3(3), L4(3), L3(4), L4(4)] - xmm2 = _mm_unpacklo_epi16(y0i,y1i); - // [L3(5), L4(5), L3(6), L4(6), L3(7), L4(7), L3(8), L4(8)] - xmm3 = _mm_unpackhi_epi16(y0i,y1i); - - stream0_128i_out[2*i+0] = _mm_unpacklo_epi32(xmm0,xmm2); // 8LLRs, 2REs - stream0_128i_out[2*i+1] = _mm_unpackhi_epi32(xmm0,xmm2); - stream0_128i_out[2*i+2] = _mm_unpacklo_epi32(xmm1,xmm3); - stream0_128i_out[2*i+3] = _mm_unpackhi_epi32(xmm1,xmm3); - -#elif defined(__arm__) || defined(__aarch64__) - -#endif - } - -#if defined(__x86_64__) || defined(__i386__) - _mm_empty(); - _m_empty(); -#endif - -} - -void nr_qam16_qam16(short *stream0_in, - short *stream1_in, - short *ch_mag, - short *ch_mag_i, - short *stream0_out, - short *rho01, - int length - ) -{ - - /* - Author: Sebastian Wagner - Date: 2012-06-04 - - Input: - stream0_in: MF filter for 1st stream, i.e., y0=h0'*y - stream!_in: MF filter for 2nd stream, i.e., y1=h1'*y - ch_mag: 2*h0/sqrt(00), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc - ch_mag_i: 2*h1/sqrt(00), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc - rho01: Channel cross correlation, i.e., h1'*h0 - - Output: - stream0_out: output LLRs for 1st stream - */ -#if defined(__x86_64__) || defined(__i386__) - __m128i *rho01_128i = (__m128i *)rho01; - __m128i *stream0_128i_in = (__m128i *)stream0_in; - __m128i *stream1_128i_in = (__m128i *)stream1_in; - __m128i *stream0_128i_out = (__m128i *)stream0_out; - __m128i *ch_mag_128i = (__m128i *)ch_mag; - __m128i *ch_mag_128i_i = (__m128i *)ch_mag_i; - - - - __m128i ONE_OVER_SQRT_10 = _mm_set1_epi16(20724); // round(1/sqrt(10)*2^16) - __m128i ONE_OVER_SQRT_10_Q15 = _mm_set1_epi16(10362); // round(1/sqrt(10)*2^15) - __m128i THREE_OVER_SQRT_10 = _mm_set1_epi16(31086); // round(3/sqrt(10)*2^15) - __m128i SQRT_10_OVER_FOUR = _mm_set1_epi16(25905); // round(sqrt(10)/4*2^15) - __m128i ONE_OVER_TWO_SQRT_10 = _mm_set1_epi16(10362); // round(1/2/sqrt(10)*2^16) - __m128i NINE_OVER_TWO_SQRT_10 = _mm_set1_epi16(23315); // round(9/2/sqrt(10)*2^14) - __m128i ch_mag_des,ch_mag_int; - __m128i y0r_over_sqrt10; - __m128i y0i_over_sqrt10; - __m128i y0r_three_over_sqrt10; - __m128i y0i_three_over_sqrt10; - __m128i ch_mag_over_10; - __m128i ch_mag_over_2; - __m128i ch_mag_9_over_10; -#elif defined(__arm__) || defined(__aarch64__) - -#endif - - int i; - - for (i=0; i<length>>2; i+=2) { - // In one iteration, we deal with 8 REs - -#if defined(__x86_64__) || defined(__i386__) - // Get rho - xmm0 = rho01_128i[i]; - xmm1 = rho01_128i[i+1]; - xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] - //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] - xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho) - xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho) - rho_rpi = _mm_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho) - rho_rmi = _mm_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho) - - // Compute the different rhos - rho_rpi_1_1 = _mm_mulhi_epi16(rho_rpi,ONE_OVER_SQRT_10); - rho_rmi_1_1 = _mm_mulhi_epi16(rho_rmi,ONE_OVER_SQRT_10); - rho_rpi_3_3 = _mm_mulhi_epi16(rho_rpi,THREE_OVER_SQRT_10); - rho_rmi_3_3 = _mm_mulhi_epi16(rho_rmi,THREE_OVER_SQRT_10); - rho_rpi_3_3 = _mm_slli_epi16(rho_rpi_3_3,1); - rho_rmi_3_3 = _mm_slli_epi16(rho_rmi_3_3,1); - - xmm4 = _mm_mulhi_epi16(xmm2,ONE_OVER_SQRT_10); // Re(rho) - xmm5 = _mm_mulhi_epi16(xmm3,THREE_OVER_SQRT_10); // Im(rho) - xmm5 = _mm_slli_epi16(xmm5,1); - - rho_rpi_1_3 = _mm_adds_epi16(xmm4,xmm5); - rho_rmi_1_3 = _mm_subs_epi16(xmm4,xmm5); - - xmm6 = _mm_mulhi_epi16(xmm2,THREE_OVER_SQRT_10); // Re(rho) - xmm7 = _mm_mulhi_epi16(xmm3,ONE_OVER_SQRT_10); // Im(rho) - xmm6 = _mm_slli_epi16(xmm6,1); - - rho_rpi_3_1 = _mm_adds_epi16(xmm6,xmm7); - rho_rmi_3_1 = _mm_subs_epi16(xmm6,xmm7); - - // Rearrange interfering MF output - xmm0 = stream1_128i_in[i]; - xmm1 = stream1_128i_in[i+1]; - xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] - //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] - y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)] - y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)] - - xmm0 = _mm_setzero_si128(); // ZERO - xmm2 = _mm_subs_epi16(rho_rpi_1_1,y1r); // = [Re(rho)+ Im(rho)]/sqrt(10) - y1r - psi_r_p1_p1 = _mm_abs_epi16(xmm2); // = |[Re(rho)+ Im(rho)]/sqrt(10) - y1r| - - xmm2= _mm_subs_epi16(rho_rmi_1_1,y1r); - psi_r_p1_m1 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rmi_1_1,y1i); - psi_i_p1_p1 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rpi_1_3,y1r); - psi_r_p1_p3 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rmi_1_3,y1r); - psi_r_p1_m3 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rmi_3_1,y1i); - psi_i_p1_p3 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rpi_3_1,y1r); - psi_r_p3_p1 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rmi_3_1,y1r); - psi_r_p3_m1 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rmi_1_3,y1i); - psi_i_p3_p1 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rpi_3_3,y1r); - psi_r_p3_p3 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rmi_3_3,y1r); - psi_r_p3_m3 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rmi_3_3,y1i); - psi_i_p3_p3 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rpi_1_1,y1i); - psi_i_m1_p1 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rpi_3_1,y1i); - psi_i_m1_p3 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rpi_1_3,y1i); - psi_i_m3_p1 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rpi_3_3,y1i); - psi_i_m3_p3 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(rho_rpi_1_1,y1i); - psi_i_p1_m1 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(rho_rpi_3_1,y1i); - psi_i_p1_m3 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(rho_rpi_1_3,y1i); - psi_i_p3_m1 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(rho_rpi_3_3,y1i); - psi_i_p3_m3 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(rho_rpi_1_1,y1r); - psi_r_m1_m1 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(rho_rpi_1_3,y1r); - psi_r_m1_m3 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(rho_rpi_3_1,y1r); - psi_r_m3_m1 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(rho_rpi_3_3,y1r); - psi_r_m3_m3 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(y1r,rho_rmi_1_1); - psi_r_m1_p1 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(y1r,rho_rmi_1_3); - psi_r_m1_p3 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(y1i,rho_rmi_1_1); - psi_i_m1_m1 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(y1i,rho_rmi_3_1); - psi_i_m1_m3 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(y1r,rho_rmi_3_1); - psi_r_m3_p1 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(y1r,rho_rmi_3_3); - psi_r_m3_p3 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(y1i,rho_rmi_1_3); - psi_i_m3_m1 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(y1i,rho_rmi_3_3); - psi_i_m3_m3 = _mm_abs_epi16(xmm2); - - // Rearrange desired MF output - xmm0 = stream0_128i_in[i]; - xmm1 = stream0_128i_in[i+1]; - xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] - //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] - y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)] - y0i = _mm_unpackhi_epi64(xmm0,xmm1); - - // Rearrange desired channel magnitudes - xmm2 = ch_mag_128i[i]; // = [|h|^2(1),|h|^2(1),|h|^2(2),|h|^2(2)]*(2/sqrt(10)) - xmm3 = ch_mag_128i[i+1]; // = [|h|^2(3),|h|^2(3),|h|^2(4),|h|^2(4)]*(2/sqrt(10)) - xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - - ch_mag_des = _mm_unpacklo_epi64(xmm2,xmm3); // = [|h|^2(1),|h|^2(2),|h|^2(3),|h|^2(4)]*(2/sqrt(10)) - - // Rearrange interfering channel magnitudes - xmm2 = ch_mag_128i_i[i]; - xmm3 = ch_mag_128i_i[i+1]; - - xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - - ch_mag_int = _mm_unpacklo_epi64(xmm2,xmm3); - - // Scale MF output of desired signal - y0r_over_sqrt10 = _mm_mulhi_epi16(y0r,ONE_OVER_SQRT_10); - y0i_over_sqrt10 = _mm_mulhi_epi16(y0i,ONE_OVER_SQRT_10); - y0r_three_over_sqrt10 = _mm_mulhi_epi16(y0r,THREE_OVER_SQRT_10); - y0i_three_over_sqrt10 = _mm_mulhi_epi16(y0i,THREE_OVER_SQRT_10); - y0r_three_over_sqrt10 = _mm_slli_epi16(y0r_three_over_sqrt10,1); - y0i_three_over_sqrt10 = _mm_slli_epi16(y0i_three_over_sqrt10,1); - - // Compute necessary combination of required terms - y0_p_1_1 = _mm_adds_epi16(y0r_over_sqrt10,y0i_over_sqrt10); - y0_m_1_1 = _mm_subs_epi16(y0r_over_sqrt10,y0i_over_sqrt10); - - y0_p_1_3 = _mm_adds_epi16(y0r_over_sqrt10,y0i_three_over_sqrt10); - y0_m_1_3 = _mm_subs_epi16(y0r_over_sqrt10,y0i_three_over_sqrt10); - - y0_p_3_1 = _mm_adds_epi16(y0r_three_over_sqrt10,y0i_over_sqrt10); - y0_m_3_1 = _mm_subs_epi16(y0r_three_over_sqrt10,y0i_over_sqrt10); - - y0_p_3_3 = _mm_adds_epi16(y0r_three_over_sqrt10,y0i_three_over_sqrt10); - y0_m_3_3 = _mm_subs_epi16(y0r_three_over_sqrt10,y0i_three_over_sqrt10); - - // Compute optimal interfering symbol magnitude - interference_abs_epi16(psi_r_p1_p1 ,ch_mag_int,a_r_p1_p1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p1_p1 ,ch_mag_int,a_i_p1_p1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p1_p3 ,ch_mag_int,a_r_p1_p3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p1_p3 ,ch_mag_int,a_i_p1_p3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p1_m1 ,ch_mag_int,a_r_p1_m1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p1_m1 ,ch_mag_int,a_i_p1_m1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p1_m3 ,ch_mag_int,a_r_p1_m3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p1_m3 ,ch_mag_int,a_i_p1_m3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p3_p1 ,ch_mag_int,a_r_p3_p1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p3_p1 ,ch_mag_int,a_i_p3_p1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p3_p3 ,ch_mag_int,a_r_p3_p3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p3_p3 ,ch_mag_int,a_i_p3_p3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p3_m1 ,ch_mag_int,a_r_p3_m1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p3_m1 ,ch_mag_int,a_i_p3_m1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p3_m3 ,ch_mag_int,a_r_p3_m3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p3_m3 ,ch_mag_int,a_i_p3_m3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m1_p1 ,ch_mag_int,a_r_m1_p1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m1_p1 ,ch_mag_int,a_i_m1_p1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m1_p3 ,ch_mag_int,a_r_m1_p3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m1_p3 ,ch_mag_int,a_i_m1_p3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m1_m1 ,ch_mag_int,a_r_m1_m1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m1_m1 ,ch_mag_int,a_i_m1_m1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m1_m3 ,ch_mag_int,a_r_m1_m3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m1_m3 ,ch_mag_int,a_i_m1_m3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m3_p1 ,ch_mag_int,a_r_m3_p1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m3_p1 ,ch_mag_int,a_i_m3_p1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m3_p3 ,ch_mag_int,a_r_m3_p3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m3_p3 ,ch_mag_int,a_i_m3_p3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m3_m1 ,ch_mag_int,a_r_m3_m1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m3_m1 ,ch_mag_int,a_i_m3_m1,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m3_m3 ,ch_mag_int,a_r_m3_m3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m3_m3 ,ch_mag_int,a_i_m3_m3,ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - - // Calculation of groups of two terms in the bit metric involving product of psi and interference magnitude - prodsum_psi_a_epi16(psi_r_p1_p1,a_r_p1_p1,psi_i_p1_p1,a_i_p1_p1,psi_a_p1_p1); - prodsum_psi_a_epi16(psi_r_p1_p3,a_r_p1_p3,psi_i_p1_p3,a_i_p1_p3,psi_a_p1_p3); - prodsum_psi_a_epi16(psi_r_p3_p1,a_r_p3_p1,psi_i_p3_p1,a_i_p3_p1,psi_a_p3_p1); - prodsum_psi_a_epi16(psi_r_p3_p3,a_r_p3_p3,psi_i_p3_p3,a_i_p3_p3,psi_a_p3_p3); - prodsum_psi_a_epi16(psi_r_p1_m1,a_r_p1_m1,psi_i_p1_m1,a_i_p1_m1,psi_a_p1_m1); - prodsum_psi_a_epi16(psi_r_p1_m3,a_r_p1_m3,psi_i_p1_m3,a_i_p1_m3,psi_a_p1_m3); - prodsum_psi_a_epi16(psi_r_p3_m1,a_r_p3_m1,psi_i_p3_m1,a_i_p3_m1,psi_a_p3_m1); - prodsum_psi_a_epi16(psi_r_p3_m3,a_r_p3_m3,psi_i_p3_m3,a_i_p3_m3,psi_a_p3_m3); - prodsum_psi_a_epi16(psi_r_m1_p1,a_r_m1_p1,psi_i_m1_p1,a_i_m1_p1,psi_a_m1_p1); - prodsum_psi_a_epi16(psi_r_m1_p3,a_r_m1_p3,psi_i_m1_p3,a_i_m1_p3,psi_a_m1_p3); - prodsum_psi_a_epi16(psi_r_m3_p1,a_r_m3_p1,psi_i_m3_p1,a_i_m3_p1,psi_a_m3_p1); - prodsum_psi_a_epi16(psi_r_m3_p3,a_r_m3_p3,psi_i_m3_p3,a_i_m3_p3,psi_a_m3_p3); - prodsum_psi_a_epi16(psi_r_m1_m1,a_r_m1_m1,psi_i_m1_m1,a_i_m1_m1,psi_a_m1_m1); - prodsum_psi_a_epi16(psi_r_m1_m3,a_r_m1_m3,psi_i_m1_m3,a_i_m1_m3,psi_a_m1_m3); - prodsum_psi_a_epi16(psi_r_m3_m1,a_r_m3_m1,psi_i_m3_m1,a_i_m3_m1,psi_a_m3_m1); - prodsum_psi_a_epi16(psi_r_m3_m3,a_r_m3_m3,psi_i_m3_m3,a_i_m3_m3,psi_a_m3_m3); - - - // squared interference magnitude times int. ch. power - square_a_epi16(a_r_p1_p1,a_i_p1_p1,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_p1_p1); - square_a_epi16(a_r_p1_p3,a_i_p1_p3,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_p1_p3); - square_a_epi16(a_r_p3_p1,a_i_p3_p1,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_p3_p1); - square_a_epi16(a_r_p3_p3,a_i_p3_p3,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_p3_p3); - square_a_epi16(a_r_p1_m1,a_i_p1_m1,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_p1_m1); - square_a_epi16(a_r_p1_m3,a_i_p1_m3,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_p1_m3); - square_a_epi16(a_r_p3_m1,a_i_p3_m1,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_p3_m1); - square_a_epi16(a_r_p3_m3,a_i_p3_m3,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_p3_m3); - square_a_epi16(a_r_m1_p1,a_i_m1_p1,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_m1_p1); - square_a_epi16(a_r_m1_p3,a_i_m1_p3,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_m1_p3); - square_a_epi16(a_r_m3_p1,a_i_m3_p1,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_m3_p1); - square_a_epi16(a_r_m3_p3,a_i_m3_p3,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_m3_p3); - square_a_epi16(a_r_m1_m1,a_i_m1_m1,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_m1_m1); - square_a_epi16(a_r_m1_m3,a_i_m1_m3,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_m1_m3); - square_a_epi16(a_r_m3_m1,a_i_m3_m1,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_m3_m1); - square_a_epi16(a_r_m3_m3,a_i_m3_m3,ch_mag_int,SQRT_10_OVER_FOUR,a_sq_m3_m3); - - // Computing different multiples of channel norms - ch_mag_over_10=_mm_mulhi_epi16(ch_mag_des, ONE_OVER_TWO_SQRT_10); - ch_mag_over_2=_mm_mulhi_epi16(ch_mag_des, SQRT_10_OVER_FOUR); - ch_mag_over_2=_mm_slli_epi16(ch_mag_over_2, 1); - ch_mag_9_over_10=_mm_mulhi_epi16(ch_mag_des, NINE_OVER_TWO_SQRT_10); - ch_mag_9_over_10=_mm_slli_epi16(ch_mag_9_over_10, 2); - - // Computing Metrics - xmm0 = _mm_subs_epi16(psi_a_p1_p1,a_sq_p1_p1); - xmm1 = _mm_adds_epi16(xmm0,y0_p_1_1); - bit_met_p1_p1= _mm_subs_epi16(xmm1,ch_mag_over_10); - - xmm0 = _mm_subs_epi16(psi_a_p1_p3,a_sq_p1_p3); - xmm1 = _mm_adds_epi16(xmm0,y0_p_1_3); - bit_met_p1_p3= _mm_subs_epi16(xmm1,ch_mag_over_2); - - xmm0 = _mm_subs_epi16(psi_a_p1_m1,a_sq_p1_m1); - xmm1 = _mm_adds_epi16(xmm0,y0_m_1_1); - bit_met_p1_m1= _mm_subs_epi16(xmm1,ch_mag_over_10); - - xmm0 = _mm_subs_epi16(psi_a_p1_m3,a_sq_p1_m3); - xmm1 = _mm_adds_epi16(xmm0,y0_m_1_3); - bit_met_p1_m3= _mm_subs_epi16(xmm1,ch_mag_over_2); - - xmm0 = _mm_subs_epi16(psi_a_p3_p1,a_sq_p3_p1); - xmm1 = _mm_adds_epi16(xmm0,y0_p_3_1); - bit_met_p3_p1= _mm_subs_epi16(xmm1,ch_mag_over_2); - - xmm0 = _mm_subs_epi16(psi_a_p3_p3,a_sq_p3_p3); - xmm1 = _mm_adds_epi16(xmm0,y0_p_3_3); - bit_met_p3_p3= _mm_subs_epi16(xmm1,ch_mag_9_over_10); - - xmm0 = _mm_subs_epi16(psi_a_p3_m1,a_sq_p3_m1); - xmm1 = _mm_adds_epi16(xmm0,y0_m_3_1); - bit_met_p3_m1= _mm_subs_epi16(xmm1,ch_mag_over_2); - - xmm0 = _mm_subs_epi16(psi_a_p3_m3,a_sq_p3_m3); - xmm1 = _mm_adds_epi16(xmm0,y0_m_3_3); - bit_met_p3_m3= _mm_subs_epi16(xmm1,ch_mag_9_over_10); - - xmm0 = _mm_subs_epi16(psi_a_m1_p1,a_sq_m1_p1); - xmm1 = _mm_subs_epi16(xmm0,y0_m_1_1); - bit_met_m1_p1= _mm_subs_epi16(xmm1,ch_mag_over_10); - - xmm0 = _mm_subs_epi16(psi_a_m1_p3,a_sq_m1_p3); - xmm1 = _mm_subs_epi16(xmm0,y0_m_1_3); - bit_met_m1_p3= _mm_subs_epi16(xmm1,ch_mag_over_2); - - xmm0 = _mm_subs_epi16(psi_a_m1_m1,a_sq_m1_m1); - xmm1 = _mm_subs_epi16(xmm0,y0_p_1_1); - bit_met_m1_m1= _mm_subs_epi16(xmm1,ch_mag_over_10); - - xmm0 = _mm_subs_epi16(psi_a_m1_m3,a_sq_m1_m3); - xmm1 = _mm_subs_epi16(xmm0,y0_p_1_3); - bit_met_m1_m3= _mm_subs_epi16(xmm1,ch_mag_over_2); - - xmm0 = _mm_subs_epi16(psi_a_m3_p1,a_sq_m3_p1); - xmm1 = _mm_subs_epi16(xmm0,y0_m_3_1); - bit_met_m3_p1= _mm_subs_epi16(xmm1,ch_mag_over_2); - - xmm0 = _mm_subs_epi16(psi_a_m3_p3,a_sq_m3_p3); - xmm1 = _mm_subs_epi16(xmm0,y0_m_3_3); - bit_met_m3_p3= _mm_subs_epi16(xmm1,ch_mag_9_over_10); - - xmm0 = _mm_subs_epi16(psi_a_m3_m1,a_sq_m3_m1); - xmm1 = _mm_subs_epi16(xmm0,y0_p_3_1); - bit_met_m3_m1= _mm_subs_epi16(xmm1,ch_mag_over_2); - - xmm0 = _mm_subs_epi16(psi_a_m3_m3,a_sq_m3_m3); - xmm1 = _mm_subs_epi16(xmm0,y0_p_3_3); - bit_met_m3_m3= _mm_subs_epi16(xmm1,ch_mag_9_over_10); - - // LLR of the first bit - // Bit = 1 - xmm0 = _mm_max_epi16(bit_met_m1_p1,bit_met_m1_p3); - xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m1_m3); - xmm2 = _mm_max_epi16(bit_met_m3_p1,bit_met_m3_p3); - xmm3 = _mm_max_epi16(bit_met_m3_m1,bit_met_m3_m3); - xmm4 = _mm_max_epi16(xmm0,xmm1); - xmm5 = _mm_max_epi16(xmm2,xmm3); - logmax_num_re0= _mm_max_epi16(xmm4,xmm5); - - // Bit = 0 - xmm0 = _mm_max_epi16(bit_met_p1_p1,bit_met_p1_p3); - xmm1 = _mm_max_epi16(bit_met_p1_m1,bit_met_p1_m3); - xmm2 = _mm_max_epi16(bit_met_p3_p1,bit_met_p3_p3); - xmm3 = _mm_max_epi16(bit_met_p3_m1,bit_met_p3_m3); - xmm4 = _mm_max_epi16(xmm0,xmm1); - xmm5 = _mm_max_epi16(xmm2,xmm3); - logmax_den_re0 = _mm_max_epi16(xmm4,xmm5); - - // LLR of first bit [L1(1), L1(2), L1(3), L1(4), L1(5), L1(6), L1(7), L1(8)] - y0r = _mm_subs_epi16(logmax_den_re0,logmax_num_re0); - - // LLR of the second bit - // Bit = 1 - xmm0 = _mm_max_epi16(bit_met_p1_m1,bit_met_p3_m1); - xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m3_m1); - xmm2 = _mm_max_epi16(bit_met_p1_m3,bit_met_p3_m3); - xmm3 = _mm_max_epi16(bit_met_m1_m3,bit_met_m3_m3); - xmm4 = _mm_max_epi16(xmm0,xmm1); - xmm5 = _mm_max_epi16(xmm2,xmm3); - logmax_num_re1 = _mm_max_epi16(xmm4,xmm5); - - // Bit = 0 - xmm0 = _mm_max_epi16(bit_met_p1_p1,bit_met_p3_p1); - xmm1 = _mm_max_epi16(bit_met_m1_p1,bit_met_m3_p1); - xmm2 = _mm_max_epi16(bit_met_p1_p3,bit_met_p3_p3); - xmm3 = _mm_max_epi16(bit_met_m1_p3,bit_met_m3_p3); - xmm4 = _mm_max_epi16(xmm0,xmm1); - xmm5 = _mm_max_epi16(xmm2,xmm3); - logmax_den_re1 = _mm_max_epi16(xmm4,xmm5); - - // LLR of second bit [L2(1), L2(2), L2(3), L2(4)] - y1r = _mm_subs_epi16(logmax_den_re1,logmax_num_re1); - - // LLR of the third bit - // Bit = 1 - xmm0 = _mm_max_epi16(bit_met_m3_p1,bit_met_m3_p3); - xmm1 = _mm_max_epi16(bit_met_m3_m1,bit_met_m3_m3); - xmm2 = _mm_max_epi16(bit_met_p3_p1,bit_met_p3_p3); - xmm3 = _mm_max_epi16(bit_met_p3_m1,bit_met_p3_m3); - xmm4 = _mm_max_epi16(xmm0,xmm1); - xmm5 = _mm_max_epi16(xmm2,xmm3); - logmax_num_im0 = _mm_max_epi16(xmm4,xmm5); - - // Bit = 0 - xmm0 = _mm_max_epi16(bit_met_m1_p1,bit_met_m1_p3); - xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m1_m3); - xmm2 = _mm_max_epi16(bit_met_p1_p1,bit_met_p1_p3); - xmm3 = _mm_max_epi16(bit_met_p1_m1,bit_met_p1_m3); - xmm4 = _mm_max_epi16(xmm0,xmm1); - xmm5 = _mm_max_epi16(xmm2,xmm3); - logmax_den_im0 = _mm_max_epi16(xmm4,xmm5); - - // LLR of third bit [L3(1), L3(2), L3(3), L3(4)] - y0i = _mm_subs_epi16(logmax_den_im0,logmax_num_im0); - - // LLR of the fourth bit - // Bit = 1 - xmm0 = _mm_max_epi16(bit_met_p1_m3,bit_met_p3_m3); - xmm1 = _mm_max_epi16(bit_met_m1_m3,bit_met_m3_m3); - xmm2 = _mm_max_epi16(bit_met_p1_p3,bit_met_p3_p3); - xmm3 = _mm_max_epi16(bit_met_m1_p3,bit_met_m3_p3); - xmm4 = _mm_max_epi16(xmm0,xmm1); - xmm5 = _mm_max_epi16(xmm2,xmm3); - logmax_num_im1 = _mm_max_epi16(xmm4,xmm5); - - // Bit = 0 - xmm0 = _mm_max_epi16(bit_met_p1_m1,bit_met_p3_m1); - xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m3_m1); - xmm2 = _mm_max_epi16(bit_met_p1_p1,bit_met_p3_p1); - xmm3 = _mm_max_epi16(bit_met_m1_p1,bit_met_m3_p1); - xmm4 = _mm_max_epi16(xmm0,xmm1); - xmm5 = _mm_max_epi16(xmm2,xmm3); - logmax_den_im1 = _mm_max_epi16(xmm4,xmm5); - - // LLR of fourth bit [L4(1), L4(2), L4(3), L4(4)] - y1i = _mm_subs_epi16(logmax_den_im1,logmax_num_im1); - - // Pack LLRs in output - // [L1(1), L2(1), L1(2), L2(2), L1(3), L2(3), L1(4), L2(4)] - xmm0 = _mm_unpacklo_epi16(y0r,y1r); - // [L1(5), L2(5), L1(6), L2(6), L1(7), L2(7), L1(8), L2(8)] - xmm1 = _mm_unpackhi_epi16(y0r,y1r); - // [L3(1), L4(1), L3(2), L4(2), L3(3), L4(3), L3(4), L4(4)] - xmm2 = _mm_unpacklo_epi16(y0i,y1i); - // [L3(5), L4(5), L3(6), L4(6), L3(7), L4(7), L3(8), L4(8)] - xmm3 = _mm_unpackhi_epi16(y0i,y1i); - - stream0_128i_out[2*i+0] = _mm_unpacklo_epi32(xmm0,xmm2); // 8LLRs, 2REs - stream0_128i_out[2*i+1] = _mm_unpackhi_epi32(xmm0,xmm2); - stream0_128i_out[2*i+2] = _mm_unpacklo_epi32(xmm1,xmm3); - stream0_128i_out[2*i+3] = _mm_unpackhi_epi32(xmm1,xmm3); -#elif defined(__arm__) || defined(__aarch64__) - -#endif - - } - -#if defined(__x86_64__) || defined(__i386__) - _mm_empty(); - _m_empty(); -#endif -} - -void nr_qam16_qam64(int16_t *stream0_in, - int16_t *stream1_in, - int16_t *ch_mag, - int16_t *ch_mag_i, - int16_t *stream0_out, - int16_t *rho01, - int32_t length - ) -{ - - /* - Author: Sebastian Wagner - Date: 2012-06-04 - - Input: - stream0_in: MF filter for 1st stream, i.e., y0=h0'*y - stream!_in: MF filter for 2nd stream, i.e., y1=h1'*y - ch_mag: 2*h0/sqrt(00), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc - ch_mag_i: 2*h1/sqrt(00), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc - rho01: Channel cross correlation, i.e., h1'*h0 - - Output: - stream0_out: output LLRs for 1st stream - */ - -#if defined(__x86_64__) || defined(__i386__) - __m128i *rho01_128i = (__m128i *)rho01; - __m128i *stream0_128i_in = (__m128i *)stream0_in; - __m128i *stream1_128i_in = (__m128i *)stream1_in; - __m128i *stream0_128i_out = (__m128i *)stream0_out; - __m128i *ch_mag_128i = (__m128i *)ch_mag; - __m128i *ch_mag_128i_i = (__m128i *)ch_mag_i; - - - __m128i ONE_OVER_SQRT_2 = _mm_set1_epi16(23170); // round(1/sqrt(2)*2^15) - __m128i ONE_OVER_SQRT_10 = _mm_set1_epi16(20724); // round(1/sqrt(10)*2^16) - __m128i THREE_OVER_SQRT_10 = _mm_set1_epi16(31086); // round(3/sqrt(10)*2^15) - __m128i SQRT_10_OVER_FOUR = _mm_set1_epi16(25905); // round(sqrt(10)/4*2^15) - __m128i ONE_OVER_TWO_SQRT_10 = _mm_set1_epi16(10362); // round(1/2/sqrt(10)*2^16) - __m128i NINE_OVER_TWO_SQRT_10 = _mm_set1_epi16(23315); // round(9/2/sqrt(10)*2^14) - __m128i ONE_OVER_SQRT_2_42 = _mm_set1_epi16(3575); // round(1/sqrt(2*42)*2^15) - __m128i THREE_OVER_SQRT_2_42 = _mm_set1_epi16(10726); // round(3/sqrt(2*42)*2^15) - __m128i FIVE_OVER_SQRT_2_42 = _mm_set1_epi16(17876); // round(5/sqrt(2*42)*2^15) - __m128i SEVEN_OVER_SQRT_2_42 = _mm_set1_epi16(25027); // round(7/sqrt(2*42)*2^15) - __m128i SQRT_42_OVER_FOUR = _mm_set1_epi16(13272); // round(sqrt(42)/4*2^13), Q3. - __m128i ch_mag_des,ch_mag_int; - __m128i y0r_over_sqrt10; - __m128i y0i_over_sqrt10; - __m128i y0r_three_over_sqrt10; - __m128i y0i_three_over_sqrt10; - __m128i ch_mag_over_10; - __m128i ch_mag_over_2; - __m128i ch_mag_9_over_10; - __m128i ch_mag_int_with_sigma2; - __m128i two_ch_mag_int_with_sigma2; - __m128i three_ch_mag_int_with_sigma2; - -#elif defined(__arm__) || defined(__aarch64__) - -#endif - int i; - - for (i=0; i<length>>2; i+=2) { - // In one iteration, we deal with 8 REs - -#if defined(__x86_64__) || defined(__i386__) - // Get rho - xmm0 = rho01_128i[i]; - xmm1 = rho01_128i[i+1]; - xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] - //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] - xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho) - xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho) - rho_rpi = _mm_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho) - rho_rmi = _mm_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho) - - // Compute the different rhos - rho_rpi_1_1 = _mm_mulhi_epi16(rho_rpi,ONE_OVER_SQRT_10); - rho_rmi_1_1 = _mm_mulhi_epi16(rho_rmi,ONE_OVER_SQRT_10); - rho_rpi_3_3 = _mm_mulhi_epi16(rho_rpi,THREE_OVER_SQRT_10); - rho_rmi_3_3 = _mm_mulhi_epi16(rho_rmi,THREE_OVER_SQRT_10); - rho_rpi_3_3 = _mm_slli_epi16(rho_rpi_3_3,1); - rho_rmi_3_3 = _mm_slli_epi16(rho_rmi_3_3,1); - - xmm4 = _mm_mulhi_epi16(xmm2,ONE_OVER_SQRT_10); // Re(rho) - xmm5 = _mm_mulhi_epi16(xmm3,THREE_OVER_SQRT_10); // Im(rho) - xmm5 = _mm_slli_epi16(xmm5,1); - - rho_rpi_1_3 = _mm_adds_epi16(xmm4,xmm5); - rho_rmi_1_3 = _mm_subs_epi16(xmm4,xmm5); - - xmm6 = _mm_mulhi_epi16(xmm2,THREE_OVER_SQRT_10); // Re(rho) - xmm7 = _mm_mulhi_epi16(xmm3,ONE_OVER_SQRT_10); // Im(rho) - xmm6 = _mm_slli_epi16(xmm6,1); - - rho_rpi_3_1 = _mm_adds_epi16(xmm6,xmm7); - rho_rmi_3_1 = _mm_subs_epi16(xmm6,xmm7); - - // Rearrange interfering MF output - xmm0 = stream1_128i_in[i]; - xmm1 = stream1_128i_in[i+1]; - xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] - //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] - y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)] - y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)] - - xmm0 = _mm_setzero_si128(); // ZERO - xmm2 = _mm_subs_epi16(rho_rpi_1_1,y1r); // = [Re(rho)+ Im(rho)]/sqrt(10) - y1r - psi_r_p1_p1 = _mm_abs_epi16(xmm2); // = |[Re(rho)+ Im(rho)]/sqrt(10) - y1r| - - xmm2= _mm_subs_epi16(rho_rmi_1_1,y1r); - psi_r_p1_m1 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rmi_1_1,y1i); - psi_i_p1_p1 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rpi_1_3,y1r); - psi_r_p1_p3 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rmi_1_3,y1r); - psi_r_p1_m3 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rmi_3_1,y1i); - psi_i_p1_p3 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rpi_3_1,y1r); - psi_r_p3_p1 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rmi_3_1,y1r); - psi_r_p3_m1 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rmi_1_3,y1i); - psi_i_p3_p1 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rpi_3_3,y1r); - psi_r_p3_p3 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rmi_3_3,y1r); - psi_r_p3_m3 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rmi_3_3,y1i); - psi_i_p3_p3 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rpi_1_1,y1i); - psi_i_m1_p1 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rpi_3_1,y1i); - psi_i_m1_p3 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rpi_1_3,y1i); - psi_i_m3_p1 = _mm_abs_epi16(xmm2); - xmm2= _mm_subs_epi16(rho_rpi_3_3,y1i); - psi_i_m3_p3 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(rho_rpi_1_1,y1i); - psi_i_p1_m1 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(rho_rpi_3_1,y1i); - psi_i_p1_m3 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(rho_rpi_1_3,y1i); - psi_i_p3_m1 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(rho_rpi_3_3,y1i); - psi_i_p3_m3 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(rho_rpi_1_1,y1r); - psi_r_m1_m1 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(rho_rpi_1_3,y1r); - psi_r_m1_m3 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(rho_rpi_3_1,y1r); - psi_r_m3_m1 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(rho_rpi_3_3,y1r); - psi_r_m3_m3 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(y1r,rho_rmi_1_1); - psi_r_m1_p1 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(y1r,rho_rmi_1_3); - psi_r_m1_p3 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(y1i,rho_rmi_1_1); - psi_i_m1_m1 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(y1i,rho_rmi_3_1); - psi_i_m1_m3 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(y1r,rho_rmi_3_1); - psi_r_m3_p1 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(y1r,rho_rmi_3_3); - psi_r_m3_p3 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(y1i,rho_rmi_1_3); - psi_i_m3_m1 = _mm_abs_epi16(xmm2); - xmm2= _mm_adds_epi16(y1i,rho_rmi_3_3); - psi_i_m3_m3 = _mm_abs_epi16(xmm2); - - // Rearrange desired MF output - xmm0 = stream0_128i_in[i]; - xmm1 = stream0_128i_in[i+1]; - xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] - //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] - y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)] - y0i = _mm_unpackhi_epi64(xmm0,xmm1); - - // Rearrange desired channel magnitudes - xmm2 = ch_mag_128i[i]; // = [|h|^2(1),|h|^2(1),|h|^2(2),|h|^2(2)]*(2/sqrt(10)) - xmm3 = ch_mag_128i[i+1]; // = [|h|^2(3),|h|^2(3),|h|^2(4),|h|^2(4)]*(2/sqrt(10)) - xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - - ch_mag_des = _mm_unpacklo_epi64(xmm2,xmm3); // = [|h|^2(1),|h|^2(2),|h|^2(3),|h|^2(4)]*(2/sqrt(10)) - - // Rearrange interfering channel magnitudes - xmm2 = ch_mag_128i_i[i]; - xmm3 = ch_mag_128i_i[i+1]; - - xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - - ch_mag_int = _mm_unpacklo_epi64(xmm2,xmm3); - - // Scale MF output of desired signal - y0r_over_sqrt10 = _mm_mulhi_epi16(y0r,ONE_OVER_SQRT_10); - y0i_over_sqrt10 = _mm_mulhi_epi16(y0i,ONE_OVER_SQRT_10); - y0r_three_over_sqrt10 = _mm_mulhi_epi16(y0r,THREE_OVER_SQRT_10); - y0i_three_over_sqrt10 = _mm_mulhi_epi16(y0i,THREE_OVER_SQRT_10); - y0r_three_over_sqrt10 = _mm_slli_epi16(y0r_three_over_sqrt10,1); - y0i_three_over_sqrt10 = _mm_slli_epi16(y0i_three_over_sqrt10,1); - - // Compute necessary combination of required terms - y0_p_1_1 = _mm_adds_epi16(y0r_over_sqrt10,y0i_over_sqrt10); - y0_m_1_1 = _mm_subs_epi16(y0r_over_sqrt10,y0i_over_sqrt10); - - y0_p_1_3 = _mm_adds_epi16(y0r_over_sqrt10,y0i_three_over_sqrt10); - y0_m_1_3 = _mm_subs_epi16(y0r_over_sqrt10,y0i_three_over_sqrt10); - - y0_p_3_1 = _mm_adds_epi16(y0r_three_over_sqrt10,y0i_over_sqrt10); - y0_m_3_1 = _mm_subs_epi16(y0r_three_over_sqrt10,y0i_over_sqrt10); - - y0_p_3_3 = _mm_adds_epi16(y0r_three_over_sqrt10,y0i_three_over_sqrt10); - y0_m_3_3 = _mm_subs_epi16(y0r_three_over_sqrt10,y0i_three_over_sqrt10); - - // Compute optimal interfering symbol magnitude - ch_mag_int_with_sigma2 = _mm_srai_epi16(ch_mag_int, 1); // *2 - two_ch_mag_int_with_sigma2 = ch_mag_int; // *4 - three_ch_mag_int_with_sigma2 = _mm_adds_epi16(ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2); // *6 - - interference_abs_64qam_epi16(psi_r_p1_p1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p1_p1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p1_p3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p1_p3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p1_m1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p1_m1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p1_m3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p1_m3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p3_p1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p3_p1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p3_p3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p3_p3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p3_m1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p3_m1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p3_m3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p3_m3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m1_p1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m1_p1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m1_p3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m1_p3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m1_m1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m1_m1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m1_m3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m1_m3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m3_p1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m3_p1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m3_p3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m3_p3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m3_m1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m3_m1 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m1,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m3_m3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m3_m3 ,ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m3,ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42,FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - - // Calculation of groups of two terms in the bit metric involving product of psi and interference magnitude - prodsum_psi_a_epi16(psi_r_p1_p1,a_r_p1_p1,psi_i_p1_p1,a_i_p1_p1,psi_a_p1_p1); - prodsum_psi_a_epi16(psi_r_p1_p3,a_r_p1_p3,psi_i_p1_p3,a_i_p1_p3,psi_a_p1_p3); - prodsum_psi_a_epi16(psi_r_p3_p1,a_r_p3_p1,psi_i_p3_p1,a_i_p3_p1,psi_a_p3_p1); - prodsum_psi_a_epi16(psi_r_p3_p3,a_r_p3_p3,psi_i_p3_p3,a_i_p3_p3,psi_a_p3_p3); - prodsum_psi_a_epi16(psi_r_p1_m1,a_r_p1_m1,psi_i_p1_m1,a_i_p1_m1,psi_a_p1_m1); - prodsum_psi_a_epi16(psi_r_p1_m3,a_r_p1_m3,psi_i_p1_m3,a_i_p1_m3,psi_a_p1_m3); - prodsum_psi_a_epi16(psi_r_p3_m1,a_r_p3_m1,psi_i_p3_m1,a_i_p3_m1,psi_a_p3_m1); - prodsum_psi_a_epi16(psi_r_p3_m3,a_r_p3_m3,psi_i_p3_m3,a_i_p3_m3,psi_a_p3_m3); - prodsum_psi_a_epi16(psi_r_m1_p1,a_r_m1_p1,psi_i_m1_p1,a_i_m1_p1,psi_a_m1_p1); - prodsum_psi_a_epi16(psi_r_m1_p3,a_r_m1_p3,psi_i_m1_p3,a_i_m1_p3,psi_a_m1_p3); - prodsum_psi_a_epi16(psi_r_m3_p1,a_r_m3_p1,psi_i_m3_p1,a_i_m3_p1,psi_a_m3_p1); - prodsum_psi_a_epi16(psi_r_m3_p3,a_r_m3_p3,psi_i_m3_p3,a_i_m3_p3,psi_a_m3_p3); - prodsum_psi_a_epi16(psi_r_m1_m1,a_r_m1_m1,psi_i_m1_m1,a_i_m1_m1,psi_a_m1_m1); - prodsum_psi_a_epi16(psi_r_m1_m3,a_r_m1_m3,psi_i_m1_m3,a_i_m1_m3,psi_a_m1_m3); - prodsum_psi_a_epi16(psi_r_m3_m1,a_r_m3_m1,psi_i_m3_m1,a_i_m3_m1,psi_a_m3_m1); - prodsum_psi_a_epi16(psi_r_m3_m3,a_r_m3_m3,psi_i_m3_m3,a_i_m3_m3,psi_a_m3_m3); - - // Multiply by sqrt(2) - psi_a_p1_p1 = _mm_mulhi_epi16(psi_a_p1_p1, ONE_OVER_SQRT_2); - psi_a_p1_p1 = _mm_slli_epi16(psi_a_p1_p1, 2); - psi_a_p1_p3 = _mm_mulhi_epi16(psi_a_p1_p3, ONE_OVER_SQRT_2); - psi_a_p1_p3 = _mm_slli_epi16(psi_a_p1_p3, 2); - psi_a_p3_p1 = _mm_mulhi_epi16(psi_a_p3_p1, ONE_OVER_SQRT_2); - psi_a_p3_p1 = _mm_slli_epi16(psi_a_p3_p1, 2); - psi_a_p3_p3 = _mm_mulhi_epi16(psi_a_p3_p3, ONE_OVER_SQRT_2); - psi_a_p3_p3 = _mm_slli_epi16(psi_a_p3_p3, 2); - psi_a_p1_m1 = _mm_mulhi_epi16(psi_a_p1_m1, ONE_OVER_SQRT_2); - psi_a_p1_m1 = _mm_slli_epi16(psi_a_p1_m1, 2); - psi_a_p1_m3 = _mm_mulhi_epi16(psi_a_p1_m3, ONE_OVER_SQRT_2); - psi_a_p1_m3 = _mm_slli_epi16(psi_a_p1_m3, 2); - psi_a_p3_m1 = _mm_mulhi_epi16(psi_a_p3_m1, ONE_OVER_SQRT_2); - psi_a_p3_m1 = _mm_slli_epi16(psi_a_p3_m1, 2); - psi_a_p3_m3 = _mm_mulhi_epi16(psi_a_p3_m3, ONE_OVER_SQRT_2); - psi_a_p3_m3 = _mm_slli_epi16(psi_a_p3_m3, 2); - psi_a_m1_p1 = _mm_mulhi_epi16(psi_a_m1_p1, ONE_OVER_SQRT_2); - psi_a_m1_p1 = _mm_slli_epi16(psi_a_m1_p1, 2); - psi_a_m1_p3 = _mm_mulhi_epi16(psi_a_m1_p3, ONE_OVER_SQRT_2); - psi_a_m1_p3 = _mm_slli_epi16(psi_a_m1_p3, 2); - psi_a_m3_p1 = _mm_mulhi_epi16(psi_a_m3_p1, ONE_OVER_SQRT_2); - psi_a_m3_p1 = _mm_slli_epi16(psi_a_m3_p1, 2); - psi_a_m3_p3 = _mm_mulhi_epi16(psi_a_m3_p3, ONE_OVER_SQRT_2); - psi_a_m3_p3 = _mm_slli_epi16(psi_a_m3_p3, 2); - psi_a_m1_m1 = _mm_mulhi_epi16(psi_a_m1_m1, ONE_OVER_SQRT_2); - psi_a_m1_m1 = _mm_slli_epi16(psi_a_m1_m1, 2); - psi_a_m1_m3 = _mm_mulhi_epi16(psi_a_m1_m3, ONE_OVER_SQRT_2); - psi_a_m1_m3 = _mm_slli_epi16(psi_a_m1_m3, 2); - psi_a_m3_m1 = _mm_mulhi_epi16(psi_a_m3_m1, ONE_OVER_SQRT_2); - psi_a_m3_m1 = _mm_slli_epi16(psi_a_m3_m1, 2); - psi_a_m3_m3 = _mm_mulhi_epi16(psi_a_m3_m3, ONE_OVER_SQRT_2); - psi_a_m3_m3 = _mm_slli_epi16(psi_a_m3_m3, 2); - - // squared interference magnitude times int. ch. power - square_a_64qam_epi16(a_r_p1_p1,a_i_p1_p1,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_p1_p1); - square_a_64qam_epi16(a_r_p1_p3,a_i_p1_p3,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_p1_p3); - square_a_64qam_epi16(a_r_p3_p1,a_i_p3_p1,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_p3_p1); - square_a_64qam_epi16(a_r_p3_p3,a_i_p3_p3,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_p3_p3); - square_a_64qam_epi16(a_r_p1_m1,a_i_p1_m1,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_p1_m1); - square_a_64qam_epi16(a_r_p1_m3,a_i_p1_m3,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_p1_m3); - square_a_64qam_epi16(a_r_p3_m1,a_i_p3_m1,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_p3_m1); - square_a_64qam_epi16(a_r_p3_m3,a_i_p3_m3,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_p3_m3); - square_a_64qam_epi16(a_r_m1_p1,a_i_m1_p1,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_m1_p1); - square_a_64qam_epi16(a_r_m1_p3,a_i_m1_p3,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_m1_p3); - square_a_64qam_epi16(a_r_m3_p1,a_i_m3_p1,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_m3_p1); - square_a_64qam_epi16(a_r_m3_p3,a_i_m3_p3,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_m3_p3); - square_a_64qam_epi16(a_r_m1_m1,a_i_m1_m1,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_m1_m1); - square_a_64qam_epi16(a_r_m1_m3,a_i_m1_m3,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_m1_m3); - square_a_64qam_epi16(a_r_m3_m1,a_i_m3_m1,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_m3_m1); - square_a_64qam_epi16(a_r_m3_m3,a_i_m3_m3,ch_mag_int,SQRT_42_OVER_FOUR,a_sq_m3_m3); - - // Computing different multiples of channel norms - ch_mag_over_10=_mm_mulhi_epi16(ch_mag_des, ONE_OVER_TWO_SQRT_10); - ch_mag_over_2=_mm_mulhi_epi16(ch_mag_des, SQRT_10_OVER_FOUR); - ch_mag_over_2=_mm_slli_epi16(ch_mag_over_2, 1); - ch_mag_9_over_10=_mm_mulhi_epi16(ch_mag_des, NINE_OVER_TWO_SQRT_10); - ch_mag_9_over_10=_mm_slli_epi16(ch_mag_9_over_10, 2); - - // Computing Metrics - xmm0 = _mm_subs_epi16(psi_a_p1_p1,a_sq_p1_p1); - xmm1 = _mm_adds_epi16(xmm0,y0_p_1_1); - bit_met_p1_p1= _mm_subs_epi16(xmm1,ch_mag_over_10); - - xmm0 = _mm_subs_epi16(psi_a_p1_p3,a_sq_p1_p3); - xmm1 = _mm_adds_epi16(xmm0,y0_p_1_3); - bit_met_p1_p3= _mm_subs_epi16(xmm1,ch_mag_over_2); - - xmm0 = _mm_subs_epi16(psi_a_p1_m1,a_sq_p1_m1); - xmm1 = _mm_adds_epi16(xmm0,y0_m_1_1); - bit_met_p1_m1= _mm_subs_epi16(xmm1,ch_mag_over_10); - - xmm0 = _mm_subs_epi16(psi_a_p1_m3,a_sq_p1_m3); - xmm1 = _mm_adds_epi16(xmm0,y0_m_1_3); - bit_met_p1_m3= _mm_subs_epi16(xmm1,ch_mag_over_2); - - xmm0 = _mm_subs_epi16(psi_a_p3_p1,a_sq_p3_p1); - xmm1 = _mm_adds_epi16(xmm0,y0_p_3_1); - bit_met_p3_p1= _mm_subs_epi16(xmm1,ch_mag_over_2); - - xmm0 = _mm_subs_epi16(psi_a_p3_p3,a_sq_p3_p3); - xmm1 = _mm_adds_epi16(xmm0,y0_p_3_3); - bit_met_p3_p3= _mm_subs_epi16(xmm1,ch_mag_9_over_10); - - xmm0 = _mm_subs_epi16(psi_a_p3_m1,a_sq_p3_m1); - xmm1 = _mm_adds_epi16(xmm0,y0_m_3_1); - bit_met_p3_m1= _mm_subs_epi16(xmm1,ch_mag_over_2); - - xmm0 = _mm_subs_epi16(psi_a_p3_m3,a_sq_p3_m3); - xmm1 = _mm_adds_epi16(xmm0,y0_m_3_3); - bit_met_p3_m3= _mm_subs_epi16(xmm1,ch_mag_9_over_10); - - xmm0 = _mm_subs_epi16(psi_a_m1_p1,a_sq_m1_p1); - xmm1 = _mm_subs_epi16(xmm0,y0_m_1_1); - bit_met_m1_p1= _mm_subs_epi16(xmm1,ch_mag_over_10); - - xmm0 = _mm_subs_epi16(psi_a_m1_p3,a_sq_m1_p3); - xmm1 = _mm_subs_epi16(xmm0,y0_m_1_3); - bit_met_m1_p3= _mm_subs_epi16(xmm1,ch_mag_over_2); - - xmm0 = _mm_subs_epi16(psi_a_m1_m1,a_sq_m1_m1); - xmm1 = _mm_subs_epi16(xmm0,y0_p_1_1); - bit_met_m1_m1= _mm_subs_epi16(xmm1,ch_mag_over_10); - - xmm0 = _mm_subs_epi16(psi_a_m1_m3,a_sq_m1_m3); - xmm1 = _mm_subs_epi16(xmm0,y0_p_1_3); - bit_met_m1_m3= _mm_subs_epi16(xmm1,ch_mag_over_2); - - xmm0 = _mm_subs_epi16(psi_a_m3_p1,a_sq_m3_p1); - xmm1 = _mm_subs_epi16(xmm0,y0_m_3_1); - bit_met_m3_p1= _mm_subs_epi16(xmm1,ch_mag_over_2); - - xmm0 = _mm_subs_epi16(psi_a_m3_p3,a_sq_m3_p3); - xmm1 = _mm_subs_epi16(xmm0,y0_m_3_3); - bit_met_m3_p3= _mm_subs_epi16(xmm1,ch_mag_9_over_10); - - xmm0 = _mm_subs_epi16(psi_a_m3_m1,a_sq_m3_m1); - xmm1 = _mm_subs_epi16(xmm0,y0_p_3_1); - bit_met_m3_m1= _mm_subs_epi16(xmm1,ch_mag_over_2); - - xmm0 = _mm_subs_epi16(psi_a_m3_m3,a_sq_m3_m3); - xmm1 = _mm_subs_epi16(xmm0,y0_p_3_3); - bit_met_m3_m3= _mm_subs_epi16(xmm1,ch_mag_9_over_10); - - // LLR of the first bit - // Bit = 1 - xmm0 = _mm_max_epi16(bit_met_m1_p1,bit_met_m1_p3); - xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m1_m3); - xmm2 = _mm_max_epi16(bit_met_m3_p1,bit_met_m3_p3); - xmm3 = _mm_max_epi16(bit_met_m3_m1,bit_met_m3_m3); - xmm4 = _mm_max_epi16(xmm0,xmm1); - xmm5 = _mm_max_epi16(xmm2,xmm3); - logmax_num_re0= _mm_max_epi16(xmm4,xmm5); - - // Bit = 0 - xmm0 = _mm_max_epi16(bit_met_p1_p1,bit_met_p1_p3); - xmm1 = _mm_max_epi16(bit_met_p1_m1,bit_met_p1_m3); - xmm2 = _mm_max_epi16(bit_met_p3_p1,bit_met_p3_p3); - xmm3 = _mm_max_epi16(bit_met_p3_m1,bit_met_p3_m3); - xmm4 = _mm_max_epi16(xmm0,xmm1); - xmm5 = _mm_max_epi16(xmm2,xmm3); - logmax_den_re0 = _mm_max_epi16(xmm4,xmm5); - - // LLR of first bit [L1(1), L1(2), L1(3), L1(4), L1(5), L1(6), L1(7), L1(8)] - y0r = _mm_subs_epi16(logmax_den_re0,logmax_num_re0); - - // LLR of the second bit - // Bit = 1 - xmm0 = _mm_max_epi16(bit_met_p1_m1,bit_met_p3_m1); - xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m3_m1); - xmm2 = _mm_max_epi16(bit_met_p1_m3,bit_met_p3_m3); - xmm3 = _mm_max_epi16(bit_met_m1_m3,bit_met_m3_m3); - xmm4 = _mm_max_epi16(xmm0,xmm1); - xmm5 = _mm_max_epi16(xmm2,xmm3); - logmax_num_re1 = _mm_max_epi16(xmm4,xmm5); - - // Bit = 0 - xmm0 = _mm_max_epi16(bit_met_p1_p1,bit_met_p3_p1); - xmm1 = _mm_max_epi16(bit_met_m1_p1,bit_met_m3_p1); - xmm2 = _mm_max_epi16(bit_met_p1_p3,bit_met_p3_p3); - xmm3 = _mm_max_epi16(bit_met_m1_p3,bit_met_m3_p3); - xmm4 = _mm_max_epi16(xmm0,xmm1); - xmm5 = _mm_max_epi16(xmm2,xmm3); - logmax_den_re1 = _mm_max_epi16(xmm4,xmm5); - - // LLR of second bit [L2(1), L2(2), L2(3), L2(4)] - y1r = _mm_subs_epi16(logmax_den_re1,logmax_num_re1); - - // LLR of the third bit - // Bit = 1 - xmm0 = _mm_max_epi16(bit_met_m3_p1,bit_met_m3_p3); - xmm1 = _mm_max_epi16(bit_met_m3_m1,bit_met_m3_m3); - xmm2 = _mm_max_epi16(bit_met_p3_p1,bit_met_p3_p3); - xmm3 = _mm_max_epi16(bit_met_p3_m1,bit_met_p3_m3); - xmm4 = _mm_max_epi16(xmm0,xmm1); - xmm5 = _mm_max_epi16(xmm2,xmm3); - logmax_num_im0 = _mm_max_epi16(xmm4,xmm5); - - // Bit = 0 - xmm0 = _mm_max_epi16(bit_met_m1_p1,bit_met_m1_p3); - xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m1_m3); - xmm2 = _mm_max_epi16(bit_met_p1_p1,bit_met_p1_p3); - xmm3 = _mm_max_epi16(bit_met_p1_m1,bit_met_p1_m3); - xmm4 = _mm_max_epi16(xmm0,xmm1); - xmm5 = _mm_max_epi16(xmm2,xmm3); - logmax_den_im0 = _mm_max_epi16(xmm4,xmm5); - - // LLR of third bit [L3(1), L3(2), L3(3), L3(4)] - y0i = _mm_subs_epi16(logmax_den_im0,logmax_num_im0); - - // LLR of the fourth bit - // Bit = 1 - xmm0 = _mm_max_epi16(bit_met_p1_m3,bit_met_p3_m3); - xmm1 = _mm_max_epi16(bit_met_m1_m3,bit_met_m3_m3); - xmm2 = _mm_max_epi16(bit_met_p1_p3,bit_met_p3_p3); - xmm3 = _mm_max_epi16(bit_met_m1_p3,bit_met_m3_p3); - xmm4 = _mm_max_epi16(xmm0,xmm1); - xmm5 = _mm_max_epi16(xmm2,xmm3); - logmax_num_im1 = _mm_max_epi16(xmm4,xmm5); - - // Bit = 0 - xmm0 = _mm_max_epi16(bit_met_p1_m1,bit_met_p3_m1); - xmm1 = _mm_max_epi16(bit_met_m1_m1,bit_met_m3_m1); - xmm2 = _mm_max_epi16(bit_met_p1_p1,bit_met_p3_p1); - xmm3 = _mm_max_epi16(bit_met_m1_p1,bit_met_m3_p1); - xmm4 = _mm_max_epi16(xmm0,xmm1); - xmm5 = _mm_max_epi16(xmm2,xmm3); - logmax_den_im1 = _mm_max_epi16(xmm4,xmm5); - - // LLR of fourth bit [L4(1), L4(2), L4(3), L4(4)] - y1i = _mm_subs_epi16(logmax_den_im1,logmax_num_im1); - - // Pack LLRs in output - // [L1(1), L2(1), L1(2), L2(2), L1(3), L2(3), L1(4), L2(4)] - xmm0 = _mm_unpacklo_epi16(y0r,y1r); - // [L1(5), L2(5), L1(6), L2(6), L1(7), L2(7), L1(8), L2(8)] - xmm1 = _mm_unpackhi_epi16(y0r,y1r); - // [L3(1), L4(1), L3(2), L4(2), L3(3), L4(3), L3(4), L4(4)] - xmm2 = _mm_unpacklo_epi16(y0i,y1i); - // [L3(5), L4(5), L3(6), L4(6), L3(7), L4(7), L3(8), L4(8)] - xmm3 = _mm_unpackhi_epi16(y0i,y1i); - - stream0_128i_out[2*i+0] = _mm_unpacklo_epi32(xmm0,xmm2); // 8LLRs, 2REs - stream0_128i_out[2*i+1] = _mm_unpackhi_epi32(xmm0,xmm2); - stream0_128i_out[2*i+2] = _mm_unpacklo_epi32(xmm1,xmm3); - stream0_128i_out[2*i+3] = _mm_unpackhi_epi32(xmm1,xmm3); -#elif defined(__arm__) || defined(__aarch64__) - -#endif - - } - -#if defined(__x86_64__) || defined(__i386__) - _mm_empty(); - _m_empty(); -#endif -} - -//---------------------------------------------------------------------------------------------- -// 64-QAM -//---------------------------------------------------------------------------------------------- - -/* -__m128i ONE_OVER_SQRT_42 __attribute__((aligned(16))); -__m128i THREE_OVER_SQRT_42 __attribute__((aligned(16))); -__m128i FIVE_OVER_SQRT_42 __attribute__((aligned(16))); -__m128i SEVEN_OVER_SQRT_42 __attribute__((aligned(16))); - -__m128i FORTYNINE_OVER_FOUR_SQRT_42 __attribute__((aligned(16))); -__m128i THIRTYSEVEN_OVER_FOUR_SQRT_42 __attribute__((aligned(16))); -__m128i TWENTYNINE_OVER_FOUR_SQRT_42 __attribute__((aligned(16))); -__m128i TWENTYFIVE_OVER_FOUR_SQRT_42 __attribute__((aligned(16))); -__m128i SEVENTEEN_OVER_FOUR_SQRT_42 __attribute__((aligned(16))); -__m128i NINE_OVER_FOUR_SQRT_42 __attribute__((aligned(16))); -__m128i THIRTEEN_OVER_FOUR_SQRT_42 __attribute__((aligned(16))); -__m128i FIVE_OVER_FOUR_SQRT_42 __attribute__((aligned(16))); -__m128i ONE_OVER_FOUR_SQRT_42 __attribute__((aligned(16))); - -__m128i y0r_one_over_sqrt_21 __attribute__((aligned(16))); -__m128i y0r_three_over_sqrt_21 __attribute__((aligned(16))); -__m128i y0r_five_over_sqrt_21 __attribute__((aligned(16))); -__m128i y0r_seven_over_sqrt_21 __attribute__((aligned(16))); -__m128i y0i_one_over_sqrt_21 __attribute__((aligned(16))); -__m128i y0i_three_over_sqrt_21 __attribute__((aligned(16))); -__m128i y0i_five_over_sqrt_21 __attribute__((aligned(16))); -__m128i y0i_seven_over_sqrt_21 __attribute__((aligned(16))); - -__m128i ch_mag_98_over_42_with_sigma2 __attribute__((aligned(16))); -__m128i ch_mag_74_over_42_with_sigma2 __attribute__((aligned(16))); -__m128i ch_mag_58_over_42_with_sigma2 __attribute__((aligned(16))); -__m128i ch_mag_50_over_42_with_sigma2 __attribute__((aligned(16))); -__m128i ch_mag_34_over_42_with_sigma2 __attribute__((aligned(16))); -__m128i ch_mag_18_over_42_with_sigma2 __attribute__((aligned(16))); -__m128i ch_mag_26_over_42_with_sigma2 __attribute__((aligned(16))); -__m128i ch_mag_10_over_42_with_sigma2 __attribute__((aligned(16))); -__m128i ch_mag_2_over_42_with_sigma2 __attribute__((aligned(16))); - -*/ - -void nr_qam64_qpsk(int16_t *stream0_in, - int16_t *stream1_in, - int16_t *ch_mag, - int16_t *stream0_out, - int16_t *rho01, - int32_t length - ) -{ - - /* - Author: S. Wagner - Date: 31-07-12 - - Input: - stream0_in: MF filter for 1st stream, i.e., y0=h0'*y - stream1_in: MF filter for 2nd stream, i.e., y1=h1'*y - ch_mag: 4*h0/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc - ch_mag_i: 4*h1/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc - rho01: Channel cross correlation, i.e., h1'*h0 - - Output: - stream0_out: output LLRs for 1st stream - */ - -#if defined(__x86_64__) || defined(__i386__) - __m128i *rho01_128i = (__m128i *)rho01; - __m128i *stream0_128i_in = (__m128i *)stream0_in; - __m128i *stream1_128i_in = (__m128i *)stream1_in; - __m128i *ch_mag_128i = (__m128i *)ch_mag; - - - __m128i ONE_OVER_SQRT_42 = _mm_set1_epi16(10112); // round(1/sqrt(42)*2^16) - __m128i THREE_OVER_SQRT_42 = _mm_set1_epi16(30337); // round(3/sqrt(42)*2^16) - __m128i FIVE_OVER_SQRT_42 = _mm_set1_epi16(25281); // round(5/sqrt(42)*2^15) - __m128i SEVEN_OVER_SQRT_42 = _mm_set1_epi16(17697); // round(5/sqrt(42)*2^15) - __m128i ONE_OVER_SQRT_2 = _mm_set1_epi16(23170); // round(1/sqrt(2)*2^15) - __m128i FORTYNINE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(30969); // round(49/(4*sqrt(42))*2^14), Q2.14 - __m128i THIRTYSEVEN_OVER_FOUR_SQRT_42 = _mm_set1_epi16(23385); // round(37/(4*sqrt(42))*2^14), Q2.14 - __m128i TWENTYFIVE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(31601); // round(25/(4*sqrt(42))*2^15) - __m128i TWENTYNINE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(18329); // round(29/(4*sqrt(42))*2^15), Q2.14 - __m128i SEVENTEEN_OVER_FOUR_SQRT_42 = _mm_set1_epi16(21489); // round(17/(4*sqrt(42))*2^15) - __m128i NINE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(11376); // round(9/(4*sqrt(42))*2^15) - __m128i THIRTEEN_OVER_FOUR_SQRT_42 = _mm_set1_epi16(16433); // round(13/(4*sqrt(42))*2^15) - __m128i FIVE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(6320); // round(5/(4*sqrt(42))*2^15) - __m128i ONE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(1264); // round(1/(4*sqrt(42))*2^15) - - - __m128i ch_mag_des; - __m128i ch_mag_98_over_42_with_sigma2; - __m128i ch_mag_74_over_42_with_sigma2; - __m128i ch_mag_58_over_42_with_sigma2; - __m128i ch_mag_50_over_42_with_sigma2; - __m128i ch_mag_34_over_42_with_sigma2; - __m128i ch_mag_18_over_42_with_sigma2; - __m128i ch_mag_26_over_42_with_sigma2; - __m128i ch_mag_10_over_42_with_sigma2; - __m128i ch_mag_2_over_42_with_sigma2; - __m128i y0r_one_over_sqrt_21; - __m128i y0r_three_over_sqrt_21; - __m128i y0r_five_over_sqrt_21; - __m128i y0r_seven_over_sqrt_21; - __m128i y0i_one_over_sqrt_21; - __m128i y0i_three_over_sqrt_21; - __m128i y0i_five_over_sqrt_21; - __m128i y0i_seven_over_sqrt_21; -#elif defined(__arm__) || defined(__aarch64__) - -#endif - - int i,j; - - for (i=0; i<length>>2; i+=2) { - -#if defined(__x86_64) || defined(__i386__) - // Get rho - xmm0 = rho01_128i[i]; - xmm1 = rho01_128i[i+1]; - xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] - //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] - xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho) - xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho) - rho_rpi = _mm_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho) - rho_rmi = _mm_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho) - - // Compute the different rhos - rho_rpi_1_1 = _mm_mulhi_epi16(rho_rpi, ONE_OVER_SQRT_42); - rho_rmi_1_1 = _mm_mulhi_epi16(rho_rmi, ONE_OVER_SQRT_42); - rho_rpi_3_3 = _mm_mulhi_epi16(rho_rpi, THREE_OVER_SQRT_42); - rho_rmi_3_3 = _mm_mulhi_epi16(rho_rmi, THREE_OVER_SQRT_42); - rho_rpi_5_5 = _mm_mulhi_epi16(rho_rpi, FIVE_OVER_SQRT_42); - rho_rmi_5_5 = _mm_mulhi_epi16(rho_rmi, FIVE_OVER_SQRT_42); - rho_rpi_7_7 = _mm_mulhi_epi16(rho_rpi, SEVEN_OVER_SQRT_42); - rho_rmi_7_7 = _mm_mulhi_epi16(rho_rmi, SEVEN_OVER_SQRT_42); - - rho_rpi_5_5 = _mm_slli_epi16(rho_rpi_5_5, 1); - rho_rmi_5_5 = _mm_slli_epi16(rho_rmi_5_5, 1); - rho_rpi_7_7 = _mm_slli_epi16(rho_rpi_7_7, 2); - rho_rmi_7_7 = _mm_slli_epi16(rho_rmi_7_7, 2); - - xmm4 = _mm_mulhi_epi16(xmm2, ONE_OVER_SQRT_42); - xmm5 = _mm_mulhi_epi16(xmm3, ONE_OVER_SQRT_42); - xmm6 = _mm_mulhi_epi16(xmm3, THREE_OVER_SQRT_42); - xmm7 = _mm_mulhi_epi16(xmm3, FIVE_OVER_SQRT_42); - xmm8 = _mm_mulhi_epi16(xmm3, SEVEN_OVER_SQRT_42); - xmm7 = _mm_slli_epi16(xmm7, 1); - xmm8 = _mm_slli_epi16(xmm8, 2); - - rho_rpi_1_3 = _mm_adds_epi16(xmm4, xmm6); - rho_rmi_1_3 = _mm_subs_epi16(xmm4, xmm6); - rho_rpi_1_5 = _mm_adds_epi16(xmm4, xmm7); - rho_rmi_1_5 = _mm_subs_epi16(xmm4, xmm7); - rho_rpi_1_7 = _mm_adds_epi16(xmm4, xmm8); - rho_rmi_1_7 = _mm_subs_epi16(xmm4, xmm8); - - xmm4 = _mm_mulhi_epi16(xmm2, THREE_OVER_SQRT_42); - rho_rpi_3_1 = _mm_adds_epi16(xmm4, xmm5); - rho_rmi_3_1 = _mm_subs_epi16(xmm4, xmm5); - rho_rpi_3_5 = _mm_adds_epi16(xmm4, xmm7); - rho_rmi_3_5 = _mm_subs_epi16(xmm4, xmm7); - rho_rpi_3_7 = _mm_adds_epi16(xmm4, xmm8); - rho_rmi_3_7 = _mm_subs_epi16(xmm4, xmm8); - - xmm4 = _mm_mulhi_epi16(xmm2, FIVE_OVER_SQRT_42); - xmm4 = _mm_slli_epi16(xmm4, 1); - rho_rpi_5_1 = _mm_adds_epi16(xmm4, xmm5); - rho_rmi_5_1 = _mm_subs_epi16(xmm4, xmm5); - rho_rpi_5_3 = _mm_adds_epi16(xmm4, xmm6); - rho_rmi_5_3 = _mm_subs_epi16(xmm4, xmm6); - rho_rpi_5_7 = _mm_adds_epi16(xmm4, xmm8); - rho_rmi_5_7 = _mm_subs_epi16(xmm4, xmm8); - - xmm4 = _mm_mulhi_epi16(xmm2, SEVEN_OVER_SQRT_42); - xmm4 = _mm_slli_epi16(xmm4, 2); - rho_rpi_7_1 = _mm_adds_epi16(xmm4, xmm5); - rho_rmi_7_1 = _mm_subs_epi16(xmm4, xmm5); - rho_rpi_7_3 = _mm_adds_epi16(xmm4, xmm6); - rho_rmi_7_3 = _mm_subs_epi16(xmm4, xmm6); - rho_rpi_7_5 = _mm_adds_epi16(xmm4, xmm7); - rho_rmi_7_5 = _mm_subs_epi16(xmm4, xmm7); - - // Rearrange interfering MF output - xmm0 = stream1_128i_in[i]; - xmm1 = stream1_128i_in[i+1]; - xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] - //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] - y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)] - y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)] - - // Psi_r calculation from rho_rpi or rho_rmi - xmm0 = _mm_setzero_si128(); // ZERO for abs_pi16 - xmm2 = _mm_subs_epi16(rho_rpi_7_7, y1r); - psi_r_p7_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_7_5, y1r); - psi_r_p7_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_7_3, y1r); - psi_r_p7_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_7_1, y1r); - psi_r_p7_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_7_1, y1r); - psi_r_p7_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_7_3, y1r); - psi_r_p7_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_7_5, y1r); - psi_r_p7_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_7_7, y1r); - psi_r_p7_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_5_7, y1r); - psi_r_p5_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_5_5, y1r); - psi_r_p5_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_5_3, y1r); - psi_r_p5_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_5_1, y1r); - psi_r_p5_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_5_1, y1r); - psi_r_p5_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_5_3, y1r); - psi_r_p5_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_5_5, y1r); - psi_r_p5_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_5_7, y1r); - psi_r_p5_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_3_7, y1r); - psi_r_p3_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_3_5, y1r); - psi_r_p3_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_3_3, y1r); - psi_r_p3_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_3_1, y1r); - psi_r_p3_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_3_1, y1r); - psi_r_p3_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_3_3, y1r); - psi_r_p3_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_3_5, y1r); - psi_r_p3_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_3_7, y1r); - psi_r_p3_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_1_7, y1r); - psi_r_p1_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_1_5, y1r); - psi_r_p1_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_1_3, y1r); - psi_r_p1_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_1_1, y1r); - psi_r_p1_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_1_1, y1r); - psi_r_p1_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_1_3, y1r); - psi_r_p1_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_1_5, y1r); - psi_r_p1_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_1_7, y1r); - psi_r_p1_m7 = _mm_abs_epi16(xmm2); - - xmm2 = _mm_adds_epi16(rho_rmi_1_7, y1r); - psi_r_m1_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_1_5, y1r); - psi_r_m1_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_1_3, y1r); - psi_r_m1_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_1_1, y1r); - psi_r_m1_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_1_1, y1r); - psi_r_m1_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_1_3, y1r); - psi_r_m1_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_1_5, y1r); - psi_r_m1_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_1_7, y1r); - psi_r_m1_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_3_7, y1r); - psi_r_m3_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_3_5, y1r); - psi_r_m3_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_3_3, y1r); - psi_r_m3_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_3_1, y1r); - psi_r_m3_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_3_1, y1r); - psi_r_m3_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_3_3, y1r); - psi_r_m3_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_3_5, y1r); - psi_r_m3_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_3_7, y1r); - psi_r_m3_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_5_7, y1r); - psi_r_m5_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_5_5, y1r); - psi_r_m5_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_5_3, y1r); - psi_r_m5_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_5_1, y1r); - psi_r_m5_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_5_1, y1r); - psi_r_m5_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_5_3, y1r); - psi_r_m5_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_5_5, y1r); - psi_r_m5_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_5_7, y1r); - psi_r_m5_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_7_7, y1r); - psi_r_m7_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_7_5, y1r); - psi_r_m7_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_7_3, y1r); - psi_r_m7_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_7_1, y1r); - psi_r_m7_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_7_1, y1r); - psi_r_m7_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_7_3, y1r); - psi_r_m7_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_7_5, y1r); - psi_r_m7_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_7_7, y1r); - psi_r_m7_m7 = _mm_abs_epi16(xmm2); - - // Psi_i calculation from rho_rpi or rho_rmi - xmm2 = _mm_subs_epi16(rho_rmi_7_7, y1i); - psi_i_p7_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_5_7, y1i); - psi_i_p7_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_3_7, y1i); - psi_i_p7_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_1_7, y1i); - psi_i_p7_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_1_7, y1i); - psi_i_p7_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_3_7, y1i); - psi_i_p7_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_5_7, y1i); - psi_i_p7_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_7_7, y1i); - psi_i_p7_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_7_5, y1i); - psi_i_p5_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_5_5, y1i); - psi_i_p5_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_3_5, y1i); - psi_i_p5_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_1_5, y1i); - psi_i_p5_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_1_5, y1i); - psi_i_p5_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_3_5, y1i); - psi_i_p5_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_5_5, y1i); - psi_i_p5_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_7_5, y1i); - psi_i_p5_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_7_3, y1i); - psi_i_p3_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_5_3, y1i); - psi_i_p3_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_3_3, y1i); - psi_i_p3_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_1_3, y1i); - psi_i_p3_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_1_3, y1i); - psi_i_p3_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_3_3, y1i); - psi_i_p3_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_5_3, y1i); - psi_i_p3_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_7_3, y1i); - psi_i_p3_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_7_1, y1i); - psi_i_p1_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_5_1, y1i); - psi_i_p1_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_3_1, y1i); - psi_i_p1_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_1_1, y1i); - psi_i_p1_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_1_1, y1i); - psi_i_p1_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_3_1, y1i); - psi_i_p1_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_5_1, y1i); - psi_i_p1_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_7_1, y1i); - psi_i_p1_m7 = _mm_abs_epi16(xmm2); - - xmm2 = _mm_subs_epi16(rho_rpi_7_1, y1i); - psi_i_m1_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_5_1, y1i); - psi_i_m1_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_3_1, y1i); - psi_i_m1_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_1_1, y1i); - psi_i_m1_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_1_1, y1i); - psi_i_m1_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_3_1, y1i); - psi_i_m1_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_5_1, y1i); - psi_i_m1_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_7_1, y1i); - psi_i_m1_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_7_3, y1i); - psi_i_m3_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_5_3, y1i); - psi_i_m3_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_3_3, y1i); - psi_i_m3_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_1_3, y1i); - psi_i_m3_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_1_3, y1i); - psi_i_m3_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_3_3, y1i); - psi_i_m3_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_5_3, y1i); - psi_i_m3_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_7_3, y1i); - psi_i_m3_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_7_5, y1i); - psi_i_m5_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_5_5, y1i); - psi_i_m5_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_3_5, y1i); - psi_i_m5_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_1_5, y1i); - psi_i_m5_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_1_5, y1i); - psi_i_m5_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_3_5, y1i); - psi_i_m5_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_5_5, y1i); - psi_i_m5_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_7_5, y1i); - psi_i_m5_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_7_7, y1i); - psi_i_m7_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_5_7, y1i); - psi_i_m7_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_3_7, y1i); - psi_i_m7_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_1_7, y1i); - psi_i_m7_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_1_7, y1i); - psi_i_m7_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_3_7, y1i); - psi_i_m7_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_5_7, y1i); - psi_i_m7_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_7_7, y1i); - psi_i_m7_m7 = _mm_abs_epi16(xmm2); - - - // Rearrange desired MF output - xmm0 = stream0_128i_in[i]; - xmm1 = stream0_128i_in[i+1]; - xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] - //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] - y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)] - y0i = _mm_unpackhi_epi64(xmm0,xmm1); - - // Rearrange desired channel magnitudes - xmm2 = ch_mag_128i[i]; // = [|h|^2(1),|h|^2(1),|h|^2(2),|h|^2(2)]*(2/sqrt(10)) - xmm3 = ch_mag_128i[i+1]; // = [|h|^2(3),|h|^2(3),|h|^2(4),|h|^2(4)]*(2/sqrt(10)) - xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - ch_mag_des = _mm_unpacklo_epi64(xmm2,xmm3); - - y0r_one_over_sqrt_21 = _mm_mulhi_epi16(y0r, ONE_OVER_SQRT_42); - y0r_three_over_sqrt_21 = _mm_mulhi_epi16(y0r, THREE_OVER_SQRT_42); - y0r_five_over_sqrt_21 = _mm_mulhi_epi16(y0r, FIVE_OVER_SQRT_42); - y0r_five_over_sqrt_21 = _mm_slli_epi16(y0r_five_over_sqrt_21, 1); - y0r_seven_over_sqrt_21 = _mm_mulhi_epi16(y0r, SEVEN_OVER_SQRT_42); - y0r_seven_over_sqrt_21 = _mm_slli_epi16(y0r_seven_over_sqrt_21, 2); // Q2.14 - - y0i_one_over_sqrt_21 = _mm_mulhi_epi16(y0i, ONE_OVER_SQRT_42); - y0i_three_over_sqrt_21 = _mm_mulhi_epi16(y0i, THREE_OVER_SQRT_42); - y0i_five_over_sqrt_21 = _mm_mulhi_epi16(y0i, FIVE_OVER_SQRT_42); - y0i_five_over_sqrt_21 = _mm_slli_epi16(y0i_five_over_sqrt_21, 1); - y0i_seven_over_sqrt_21 = _mm_mulhi_epi16(y0i, SEVEN_OVER_SQRT_42); - y0i_seven_over_sqrt_21 = _mm_slli_epi16(y0i_seven_over_sqrt_21, 2); // Q2.14 - - y0_p_7_1 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21); - y0_p_7_3 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21); - y0_p_7_5 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21); - y0_p_7_7 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21); - y0_p_5_1 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21); - y0_p_5_3 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21); - y0_p_5_5 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21); - y0_p_5_7 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21); - y0_p_3_1 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21); - y0_p_3_3 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21); - y0_p_3_5 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21); - y0_p_3_7 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21); - y0_p_1_1 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21); - y0_p_1_3 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21); - y0_p_1_5 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21); - y0_p_1_7 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21); - - y0_m_1_1 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21); - y0_m_1_3 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21); - y0_m_1_5 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21); - y0_m_1_7 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21); - y0_m_3_1 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21); - y0_m_3_3 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21); - y0_m_3_5 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21); - y0_m_3_7 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21); - y0_m_5_1 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21); - y0_m_5_3 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21); - y0_m_5_5 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21); - y0_m_5_7 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21); - y0_m_7_1 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21); - y0_m_7_3 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21); - y0_m_7_5 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21); - y0_m_7_7 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21); - - // divide by sqrt(2) - psi_r_p7_p7 = _mm_mulhi_epi16(psi_r_p7_p7, ONE_OVER_SQRT_2); - psi_r_p7_p7 = _mm_slli_epi16(psi_r_p7_p7, 1); - psi_r_p7_p5 = _mm_mulhi_epi16(psi_r_p7_p5, ONE_OVER_SQRT_2); - psi_r_p7_p5 = _mm_slli_epi16(psi_r_p7_p5, 1); - psi_r_p7_p3 = _mm_mulhi_epi16(psi_r_p7_p3, ONE_OVER_SQRT_2); - psi_r_p7_p3 = _mm_slli_epi16(psi_r_p7_p3, 1); - psi_r_p7_p1 = _mm_mulhi_epi16(psi_r_p7_p1, ONE_OVER_SQRT_2); - psi_r_p7_p1 = _mm_slli_epi16(psi_r_p7_p1, 1); - psi_r_p7_m1 = _mm_mulhi_epi16(psi_r_p7_m1, ONE_OVER_SQRT_2); - psi_r_p7_m1 = _mm_slli_epi16(psi_r_p7_m1, 1); - psi_r_p7_m3 = _mm_mulhi_epi16(psi_r_p7_m3, ONE_OVER_SQRT_2); - psi_r_p7_m3 = _mm_slli_epi16(psi_r_p7_m3, 1); - psi_r_p7_m5 = _mm_mulhi_epi16(psi_r_p7_m5, ONE_OVER_SQRT_2); - psi_r_p7_m5 = _mm_slli_epi16(psi_r_p7_m5, 1); - psi_r_p7_m7 = _mm_mulhi_epi16(psi_r_p7_m7, ONE_OVER_SQRT_2); - psi_r_p7_m7 = _mm_slli_epi16(psi_r_p7_m7, 1); - psi_r_p5_p7 = _mm_mulhi_epi16(psi_r_p5_p7, ONE_OVER_SQRT_2); - psi_r_p5_p7 = _mm_slli_epi16(psi_r_p5_p7, 1); - psi_r_p5_p5 = _mm_mulhi_epi16(psi_r_p5_p5, ONE_OVER_SQRT_2); - psi_r_p5_p5 = _mm_slli_epi16(psi_r_p5_p5, 1); - psi_r_p5_p3 = _mm_mulhi_epi16(psi_r_p5_p3, ONE_OVER_SQRT_2); - psi_r_p5_p3 = _mm_slli_epi16(psi_r_p5_p3, 1); - psi_r_p5_p1 = _mm_mulhi_epi16(psi_r_p5_p1, ONE_OVER_SQRT_2); - psi_r_p5_p1 = _mm_slli_epi16(psi_r_p5_p1, 1); - psi_r_p5_m1 = _mm_mulhi_epi16(psi_r_p5_m1, ONE_OVER_SQRT_2); - psi_r_p5_m1 = _mm_slli_epi16(psi_r_p5_m1, 1); - psi_r_p5_m3 = _mm_mulhi_epi16(psi_r_p5_m3, ONE_OVER_SQRT_2); - psi_r_p5_m3 = _mm_slli_epi16(psi_r_p5_m3, 1); - psi_r_p5_m5 = _mm_mulhi_epi16(psi_r_p5_m5, ONE_OVER_SQRT_2); - psi_r_p5_m5 = _mm_slli_epi16(psi_r_p5_m5, 1); - psi_r_p5_m7 = _mm_mulhi_epi16(psi_r_p5_m7, ONE_OVER_SQRT_2); - psi_r_p5_m7 = _mm_slli_epi16(psi_r_p5_m7, 1); - psi_r_p3_p7 = _mm_mulhi_epi16(psi_r_p3_p7, ONE_OVER_SQRT_2); - psi_r_p3_p7 = _mm_slli_epi16(psi_r_p3_p7, 1); - psi_r_p3_p5 = _mm_mulhi_epi16(psi_r_p3_p5, ONE_OVER_SQRT_2); - psi_r_p3_p5 = _mm_slli_epi16(psi_r_p3_p5, 1); - psi_r_p3_p3 = _mm_mulhi_epi16(psi_r_p3_p3, ONE_OVER_SQRT_2); - psi_r_p3_p3 = _mm_slli_epi16(psi_r_p3_p3, 1); - psi_r_p3_p1 = _mm_mulhi_epi16(psi_r_p3_p1, ONE_OVER_SQRT_2); - psi_r_p3_p1 = _mm_slli_epi16(psi_r_p3_p1, 1); - psi_r_p3_m1 = _mm_mulhi_epi16(psi_r_p3_m1, ONE_OVER_SQRT_2); - psi_r_p3_m1 = _mm_slli_epi16(psi_r_p3_m1, 1); - psi_r_p3_m3 = _mm_mulhi_epi16(psi_r_p3_m3, ONE_OVER_SQRT_2); - psi_r_p3_m3 = _mm_slli_epi16(psi_r_p3_m3, 1); - psi_r_p3_m5 = _mm_mulhi_epi16(psi_r_p3_m5, ONE_OVER_SQRT_2); - psi_r_p3_m5 = _mm_slli_epi16(psi_r_p3_m5, 1); - psi_r_p3_m7 = _mm_mulhi_epi16(psi_r_p3_m7, ONE_OVER_SQRT_2); - psi_r_p3_m7 = _mm_slli_epi16(psi_r_p3_m7, 1); - psi_r_p1_p7 = _mm_mulhi_epi16(psi_r_p1_p7, ONE_OVER_SQRT_2); - psi_r_p1_p7 = _mm_slli_epi16(psi_r_p1_p7, 1); - psi_r_p1_p5 = _mm_mulhi_epi16(psi_r_p1_p5, ONE_OVER_SQRT_2); - psi_r_p1_p5 = _mm_slli_epi16(psi_r_p1_p5, 1); - psi_r_p1_p3 = _mm_mulhi_epi16(psi_r_p1_p3, ONE_OVER_SQRT_2); - psi_r_p1_p3 = _mm_slli_epi16(psi_r_p1_p3, 1); - psi_r_p1_p1 = _mm_mulhi_epi16(psi_r_p1_p1, ONE_OVER_SQRT_2); - psi_r_p1_p1 = _mm_slli_epi16(psi_r_p1_p1, 1); - psi_r_p1_m1 = _mm_mulhi_epi16(psi_r_p1_m1, ONE_OVER_SQRT_2); - psi_r_p1_m1 = _mm_slli_epi16(psi_r_p1_m1, 1); - psi_r_p1_m3 = _mm_mulhi_epi16(psi_r_p1_m3, ONE_OVER_SQRT_2); - psi_r_p1_m3 = _mm_slli_epi16(psi_r_p1_m3, 1); - psi_r_p1_m5 = _mm_mulhi_epi16(psi_r_p1_m5, ONE_OVER_SQRT_2); - psi_r_p1_m5 = _mm_slli_epi16(psi_r_p1_m5, 1); - psi_r_p1_m7 = _mm_mulhi_epi16(psi_r_p1_m7, ONE_OVER_SQRT_2); - psi_r_p1_m7 = _mm_slli_epi16(psi_r_p1_m7, 1); - psi_r_m1_p7 = _mm_mulhi_epi16(psi_r_m1_p7, ONE_OVER_SQRT_2); - psi_r_m1_p7 = _mm_slli_epi16(psi_r_m1_p7, 1); - psi_r_m1_p5 = _mm_mulhi_epi16(psi_r_m1_p5, ONE_OVER_SQRT_2); - psi_r_m1_p5 = _mm_slli_epi16(psi_r_m1_p5, 1); - psi_r_m1_p3 = _mm_mulhi_epi16(psi_r_m1_p3, ONE_OVER_SQRT_2); - psi_r_m1_p3 = _mm_slli_epi16(psi_r_m1_p3, 1); - psi_r_m1_p1 = _mm_mulhi_epi16(psi_r_m1_p1, ONE_OVER_SQRT_2); - psi_r_m1_p1 = _mm_slli_epi16(psi_r_m1_p1, 1); - psi_r_m1_m1 = _mm_mulhi_epi16(psi_r_m1_m1, ONE_OVER_SQRT_2); - psi_r_m1_m1 = _mm_slli_epi16(psi_r_m1_m1, 1); - psi_r_m1_m3 = _mm_mulhi_epi16(psi_r_m1_m3, ONE_OVER_SQRT_2); - psi_r_m1_m3 = _mm_slli_epi16(psi_r_m1_m3, 1); - psi_r_m1_m5 = _mm_mulhi_epi16(psi_r_m1_m5, ONE_OVER_SQRT_2); - psi_r_m1_m5 = _mm_slli_epi16(psi_r_m1_m5, 1); - psi_r_m1_m7 = _mm_mulhi_epi16(psi_r_m1_m7, ONE_OVER_SQRT_2); - psi_r_m1_m7 = _mm_slli_epi16(psi_r_m1_m7, 1); - psi_r_m3_p7 = _mm_mulhi_epi16(psi_r_m3_p7, ONE_OVER_SQRT_2); - psi_r_m3_p7 = _mm_slli_epi16(psi_r_m3_p7, 1); - psi_r_m3_p5 = _mm_mulhi_epi16(psi_r_m3_p5, ONE_OVER_SQRT_2); - psi_r_m3_p5 = _mm_slli_epi16(psi_r_m3_p5, 1); - psi_r_m3_p3 = _mm_mulhi_epi16(psi_r_m3_p3, ONE_OVER_SQRT_2); - psi_r_m3_p3 = _mm_slli_epi16(psi_r_m3_p3, 1); - psi_r_m3_p1 = _mm_mulhi_epi16(psi_r_m3_p1, ONE_OVER_SQRT_2); - psi_r_m3_p1 = _mm_slli_epi16(psi_r_m3_p1, 1); - psi_r_m3_m1 = _mm_mulhi_epi16(psi_r_m3_m1, ONE_OVER_SQRT_2); - psi_r_m3_m1 = _mm_slli_epi16(psi_r_m3_m1, 1); - psi_r_m3_m3 = _mm_mulhi_epi16(psi_r_m3_m3, ONE_OVER_SQRT_2); - psi_r_m3_m3 = _mm_slli_epi16(psi_r_m3_m3, 1); - psi_r_m3_m5 = _mm_mulhi_epi16(psi_r_m3_m5, ONE_OVER_SQRT_2); - psi_r_m3_m5 = _mm_slli_epi16(psi_r_m3_m5, 1); - psi_r_m3_m7 = _mm_mulhi_epi16(psi_r_m3_m7, ONE_OVER_SQRT_2); - psi_r_m3_m7 = _mm_slli_epi16(psi_r_m3_m7, 1); - psi_r_m5_p7 = _mm_mulhi_epi16(psi_r_m5_p7, ONE_OVER_SQRT_2); - psi_r_m5_p7 = _mm_slli_epi16(psi_r_m5_p7, 1); - psi_r_m5_p5 = _mm_mulhi_epi16(psi_r_m5_p5, ONE_OVER_SQRT_2); - psi_r_m5_p5 = _mm_slli_epi16(psi_r_m5_p5, 1); - psi_r_m5_p3 = _mm_mulhi_epi16(psi_r_m5_p3, ONE_OVER_SQRT_2); - psi_r_m5_p3 = _mm_slli_epi16(psi_r_m5_p3, 1); - psi_r_m5_p1 = _mm_mulhi_epi16(psi_r_m5_p1, ONE_OVER_SQRT_2); - psi_r_m5_p1 = _mm_slli_epi16(psi_r_m5_p1, 1); - psi_r_m5_m1 = _mm_mulhi_epi16(psi_r_m5_m1, ONE_OVER_SQRT_2); - psi_r_m5_m1 = _mm_slli_epi16(psi_r_m5_m1, 1); - psi_r_m5_m3 = _mm_mulhi_epi16(psi_r_m5_m3, ONE_OVER_SQRT_2); - psi_r_m5_m3 = _mm_slli_epi16(psi_r_m5_m3, 1); - psi_r_m5_m5 = _mm_mulhi_epi16(psi_r_m5_m5, ONE_OVER_SQRT_2); - psi_r_m5_m5 = _mm_slli_epi16(psi_r_m5_m5, 1); - psi_r_m5_m7 = _mm_mulhi_epi16(psi_r_m5_m7, ONE_OVER_SQRT_2); - psi_r_m5_m7 = _mm_slli_epi16(psi_r_m5_m7, 1); - psi_r_m7_p7 = _mm_mulhi_epi16(psi_r_m7_p7, ONE_OVER_SQRT_2); - psi_r_m7_p7 = _mm_slli_epi16(psi_r_m7_p7, 1); - psi_r_m7_p5 = _mm_mulhi_epi16(psi_r_m7_p5, ONE_OVER_SQRT_2); - psi_r_m7_p5 = _mm_slli_epi16(psi_r_m7_p5, 1); - psi_r_m7_p3 = _mm_mulhi_epi16(psi_r_m7_p3, ONE_OVER_SQRT_2); - psi_r_m7_p3 = _mm_slli_epi16(psi_r_m7_p3, 1); - psi_r_m7_p1 = _mm_mulhi_epi16(psi_r_m7_p1, ONE_OVER_SQRT_2); - psi_r_m7_p1 = _mm_slli_epi16(psi_r_m7_p1, 1); - psi_r_m7_m1 = _mm_mulhi_epi16(psi_r_m7_m1, ONE_OVER_SQRT_2); - psi_r_m7_m1 = _mm_slli_epi16(psi_r_m7_m1, 1); - psi_r_m7_m3 = _mm_mulhi_epi16(psi_r_m7_m3, ONE_OVER_SQRT_2); - psi_r_m7_m3 = _mm_slli_epi16(psi_r_m7_m3, 1); - psi_r_m7_m5 = _mm_mulhi_epi16(psi_r_m7_m5, ONE_OVER_SQRT_2); - psi_r_m7_m5 = _mm_slli_epi16(psi_r_m7_m5, 1); - psi_r_m7_m7 = _mm_mulhi_epi16(psi_r_m7_m7, ONE_OVER_SQRT_2); - psi_r_m7_m7 = _mm_slli_epi16(psi_r_m7_m7, 1); - - psi_i_p7_p7 = _mm_mulhi_epi16(psi_i_p7_p7, ONE_OVER_SQRT_2); - psi_i_p7_p7 = _mm_slli_epi16(psi_i_p7_p7, 1); - psi_i_p7_p5 = _mm_mulhi_epi16(psi_i_p7_p5, ONE_OVER_SQRT_2); - psi_i_p7_p5 = _mm_slli_epi16(psi_i_p7_p5, 1); - psi_i_p7_p3 = _mm_mulhi_epi16(psi_i_p7_p3, ONE_OVER_SQRT_2); - psi_i_p7_p3 = _mm_slli_epi16(psi_i_p7_p3, 1); - psi_i_p7_p1 = _mm_mulhi_epi16(psi_i_p7_p1, ONE_OVER_SQRT_2); - psi_i_p7_p1 = _mm_slli_epi16(psi_i_p7_p1, 1); - psi_i_p7_m1 = _mm_mulhi_epi16(psi_i_p7_m1, ONE_OVER_SQRT_2); - psi_i_p7_m1 = _mm_slli_epi16(psi_i_p7_m1, 1); - psi_i_p7_m3 = _mm_mulhi_epi16(psi_i_p7_m3, ONE_OVER_SQRT_2); - psi_i_p7_m3 = _mm_slli_epi16(psi_i_p7_m3, 1); - psi_i_p7_m5 = _mm_mulhi_epi16(psi_i_p7_m5, ONE_OVER_SQRT_2); - psi_i_p7_m5 = _mm_slli_epi16(psi_i_p7_m5, 1); - psi_i_p7_m7 = _mm_mulhi_epi16(psi_i_p7_m7, ONE_OVER_SQRT_2); - psi_i_p7_m7 = _mm_slli_epi16(psi_i_p7_m7, 1); - psi_i_p5_p7 = _mm_mulhi_epi16(psi_i_p5_p7, ONE_OVER_SQRT_2); - psi_i_p5_p7 = _mm_slli_epi16(psi_i_p5_p7, 1); - psi_i_p5_p5 = _mm_mulhi_epi16(psi_i_p5_p5, ONE_OVER_SQRT_2); - psi_i_p5_p5 = _mm_slli_epi16(psi_i_p5_p5, 1); - psi_i_p5_p3 = _mm_mulhi_epi16(psi_i_p5_p3, ONE_OVER_SQRT_2); - psi_i_p5_p3 = _mm_slli_epi16(psi_i_p5_p3, 1); - psi_i_p5_p1 = _mm_mulhi_epi16(psi_i_p5_p1, ONE_OVER_SQRT_2); - psi_i_p5_p1 = _mm_slli_epi16(psi_i_p5_p1, 1); - psi_i_p5_m1 = _mm_mulhi_epi16(psi_i_p5_m1, ONE_OVER_SQRT_2); - psi_i_p5_m1 = _mm_slli_epi16(psi_i_p5_m1, 1); - psi_i_p5_m3 = _mm_mulhi_epi16(psi_i_p5_m3, ONE_OVER_SQRT_2); - psi_i_p5_m3 = _mm_slli_epi16(psi_i_p5_m3, 1); - psi_i_p5_m5 = _mm_mulhi_epi16(psi_i_p5_m5, ONE_OVER_SQRT_2); - psi_i_p5_m5 = _mm_slli_epi16(psi_i_p5_m5, 1); - psi_i_p5_m7 = _mm_mulhi_epi16(psi_i_p5_m7, ONE_OVER_SQRT_2); - psi_i_p5_m7 = _mm_slli_epi16(psi_i_p5_m7, 1); - psi_i_p3_p7 = _mm_mulhi_epi16(psi_i_p3_p7, ONE_OVER_SQRT_2); - psi_i_p3_p7 = _mm_slli_epi16(psi_i_p3_p7, 1); - psi_i_p3_p5 = _mm_mulhi_epi16(psi_i_p3_p5, ONE_OVER_SQRT_2); - psi_i_p3_p5 = _mm_slli_epi16(psi_i_p3_p5, 1); - psi_i_p3_p3 = _mm_mulhi_epi16(psi_i_p3_p3, ONE_OVER_SQRT_2); - psi_i_p3_p3 = _mm_slli_epi16(psi_i_p3_p3, 1); - psi_i_p3_p1 = _mm_mulhi_epi16(psi_i_p3_p1, ONE_OVER_SQRT_2); - psi_i_p3_p1 = _mm_slli_epi16(psi_i_p3_p1, 1); - psi_i_p3_m1 = _mm_mulhi_epi16(psi_i_p3_m1, ONE_OVER_SQRT_2); - psi_i_p3_m1 = _mm_slli_epi16(psi_i_p3_m1, 1); - psi_i_p3_m3 = _mm_mulhi_epi16(psi_i_p3_m3, ONE_OVER_SQRT_2); - psi_i_p3_m3 = _mm_slli_epi16(psi_i_p3_m3, 1); - psi_i_p3_m5 = _mm_mulhi_epi16(psi_i_p3_m5, ONE_OVER_SQRT_2); - psi_i_p3_m5 = _mm_slli_epi16(psi_i_p3_m5, 1); - psi_i_p3_m7 = _mm_mulhi_epi16(psi_i_p3_m7, ONE_OVER_SQRT_2); - psi_i_p3_m7 = _mm_slli_epi16(psi_i_p3_m7, 1); - psi_i_p1_p7 = _mm_mulhi_epi16(psi_i_p1_p7, ONE_OVER_SQRT_2); - psi_i_p1_p7 = _mm_slli_epi16(psi_i_p1_p7, 1); - psi_i_p1_p5 = _mm_mulhi_epi16(psi_i_p1_p5, ONE_OVER_SQRT_2); - psi_i_p1_p5 = _mm_slli_epi16(psi_i_p1_p5, 1); - psi_i_p1_p3 = _mm_mulhi_epi16(psi_i_p1_p3, ONE_OVER_SQRT_2); - psi_i_p1_p3 = _mm_slli_epi16(psi_i_p1_p3, 1); - psi_i_p1_p1 = _mm_mulhi_epi16(psi_i_p1_p1, ONE_OVER_SQRT_2); - psi_i_p1_p1 = _mm_slli_epi16(psi_i_p1_p1, 1); - psi_i_p1_m1 = _mm_mulhi_epi16(psi_i_p1_m1, ONE_OVER_SQRT_2); - psi_i_p1_m1 = _mm_slli_epi16(psi_i_p1_m1, 1); - psi_i_p1_m3 = _mm_mulhi_epi16(psi_i_p1_m3, ONE_OVER_SQRT_2); - psi_i_p1_m3 = _mm_slli_epi16(psi_i_p1_m3, 1); - psi_i_p1_m5 = _mm_mulhi_epi16(psi_i_p1_m5, ONE_OVER_SQRT_2); - psi_i_p1_m5 = _mm_slli_epi16(psi_i_p1_m5, 1); - psi_i_p1_m7 = _mm_mulhi_epi16(psi_i_p1_m7, ONE_OVER_SQRT_2); - psi_i_p1_m7 = _mm_slli_epi16(psi_i_p1_m7, 1); - psi_i_m1_p7 = _mm_mulhi_epi16(psi_i_m1_p7, ONE_OVER_SQRT_2); - psi_i_m1_p7 = _mm_slli_epi16(psi_i_m1_p7, 1); - psi_i_m1_p5 = _mm_mulhi_epi16(psi_i_m1_p5, ONE_OVER_SQRT_2); - psi_i_m1_p5 = _mm_slli_epi16(psi_i_m1_p5, 1); - psi_i_m1_p3 = _mm_mulhi_epi16(psi_i_m1_p3, ONE_OVER_SQRT_2); - psi_i_m1_p3 = _mm_slli_epi16(psi_i_m1_p3, 1); - psi_i_m1_p1 = _mm_mulhi_epi16(psi_i_m1_p1, ONE_OVER_SQRT_2); - psi_i_m1_p1 = _mm_slli_epi16(psi_i_m1_p1, 1); - psi_i_m1_m1 = _mm_mulhi_epi16(psi_i_m1_m1, ONE_OVER_SQRT_2); - psi_i_m1_m1 = _mm_slli_epi16(psi_i_m1_m1, 1); - psi_i_m1_m3 = _mm_mulhi_epi16(psi_i_m1_m3, ONE_OVER_SQRT_2); - psi_i_m1_m3 = _mm_slli_epi16(psi_i_m1_m3, 1); - psi_i_m1_m5 = _mm_mulhi_epi16(psi_i_m1_m5, ONE_OVER_SQRT_2); - psi_i_m1_m5 = _mm_slli_epi16(psi_i_m1_m5, 1); - psi_i_m1_m7 = _mm_mulhi_epi16(psi_i_m1_m7, ONE_OVER_SQRT_2); - psi_i_m1_m7 = _mm_slli_epi16(psi_i_m1_m7, 1); - psi_i_m3_p7 = _mm_mulhi_epi16(psi_i_m3_p7, ONE_OVER_SQRT_2); - psi_i_m3_p7 = _mm_slli_epi16(psi_i_m3_p7, 1); - psi_i_m3_p5 = _mm_mulhi_epi16(psi_i_m3_p5, ONE_OVER_SQRT_2); - psi_i_m3_p5 = _mm_slli_epi16(psi_i_m3_p5, 1); - psi_i_m3_p3 = _mm_mulhi_epi16(psi_i_m3_p3, ONE_OVER_SQRT_2); - psi_i_m3_p3 = _mm_slli_epi16(psi_i_m3_p3, 1); - psi_i_m3_p1 = _mm_mulhi_epi16(psi_i_m3_p1, ONE_OVER_SQRT_2); - psi_i_m3_p1 = _mm_slli_epi16(psi_i_m3_p1, 1); - psi_i_m3_m1 = _mm_mulhi_epi16(psi_i_m3_m1, ONE_OVER_SQRT_2); - psi_i_m3_m1 = _mm_slli_epi16(psi_i_m3_m1, 1); - psi_i_m3_m3 = _mm_mulhi_epi16(psi_i_m3_m3, ONE_OVER_SQRT_2); - psi_i_m3_m3 = _mm_slli_epi16(psi_i_m3_m3, 1); - psi_i_m3_m5 = _mm_mulhi_epi16(psi_i_m3_m5, ONE_OVER_SQRT_2); - psi_i_m3_m5 = _mm_slli_epi16(psi_i_m3_m5, 1); - psi_i_m3_m7 = _mm_mulhi_epi16(psi_i_m3_m7, ONE_OVER_SQRT_2); - psi_i_m3_m7 = _mm_slli_epi16(psi_i_m3_m7, 1); - psi_i_m5_p7 = _mm_mulhi_epi16(psi_i_m5_p7, ONE_OVER_SQRT_2); - psi_i_m5_p7 = _mm_slli_epi16(psi_i_m5_p7, 1); - psi_i_m5_p5 = _mm_mulhi_epi16(psi_i_m5_p5, ONE_OVER_SQRT_2); - psi_i_m5_p5 = _mm_slli_epi16(psi_i_m5_p5, 1); - psi_i_m5_p3 = _mm_mulhi_epi16(psi_i_m5_p3, ONE_OVER_SQRT_2); - psi_i_m5_p3 = _mm_slli_epi16(psi_i_m5_p3, 1); - psi_i_m5_p1 = _mm_mulhi_epi16(psi_i_m5_p1, ONE_OVER_SQRT_2); - psi_i_m5_p1 = _mm_slli_epi16(psi_i_m5_p1, 1); - psi_i_m5_m1 = _mm_mulhi_epi16(psi_i_m5_m1, ONE_OVER_SQRT_2); - psi_i_m5_m1 = _mm_slli_epi16(psi_i_m5_m1, 1); - psi_i_m5_m3 = _mm_mulhi_epi16(psi_i_m5_m3, ONE_OVER_SQRT_2); - psi_i_m5_m3 = _mm_slli_epi16(psi_i_m5_m3, 1); - psi_i_m5_m5 = _mm_mulhi_epi16(psi_i_m5_m5, ONE_OVER_SQRT_2); - psi_i_m5_m5 = _mm_slli_epi16(psi_i_m5_m5, 1); - psi_i_m5_m7 = _mm_mulhi_epi16(psi_i_m5_m7, ONE_OVER_SQRT_2); - psi_i_m5_m7 = _mm_slli_epi16(psi_i_m5_m7, 1); - psi_i_m7_p7 = _mm_mulhi_epi16(psi_i_m7_p7, ONE_OVER_SQRT_2); - psi_i_m7_p7 = _mm_slli_epi16(psi_i_m7_p7, 1); - psi_i_m7_p5 = _mm_mulhi_epi16(psi_i_m7_p5, ONE_OVER_SQRT_2); - psi_i_m7_p5 = _mm_slli_epi16(psi_i_m7_p5, 1); - psi_i_m7_p3 = _mm_mulhi_epi16(psi_i_m7_p3, ONE_OVER_SQRT_2); - psi_i_m7_p3 = _mm_slli_epi16(psi_i_m7_p3, 1); - psi_i_m7_p1 = _mm_mulhi_epi16(psi_i_m7_p1, ONE_OVER_SQRT_2); - psi_i_m7_p1 = _mm_slli_epi16(psi_i_m7_p1, 1); - psi_i_m7_m1 = _mm_mulhi_epi16(psi_i_m7_m1, ONE_OVER_SQRT_2); - psi_i_m7_m1 = _mm_slli_epi16(psi_i_m7_m1, 1); - psi_i_m7_m3 = _mm_mulhi_epi16(psi_i_m7_m3, ONE_OVER_SQRT_2); - psi_i_m7_m3 = _mm_slli_epi16(psi_i_m7_m3, 1); - psi_i_m7_m5 = _mm_mulhi_epi16(psi_i_m7_m5, ONE_OVER_SQRT_2); - psi_i_m7_m5 = _mm_slli_epi16(psi_i_m7_m5, 1); - psi_i_m7_m7 = _mm_mulhi_epi16(psi_i_m7_m7, ONE_OVER_SQRT_2); - psi_i_m7_m7 = _mm_slli_epi16(psi_i_m7_m7, 1); - - psi_a_p7_p7 = _mm_adds_epi16(psi_r_p7_p7, psi_i_p7_p7); - psi_a_p7_p5 = _mm_adds_epi16(psi_r_p7_p5, psi_i_p7_p5); - psi_a_p7_p3 = _mm_adds_epi16(psi_r_p7_p3, psi_i_p7_p3); - psi_a_p7_p1 = _mm_adds_epi16(psi_r_p7_p1, psi_i_p7_p1); - psi_a_p7_m1 = _mm_adds_epi16(psi_r_p7_m1, psi_i_p7_m1); - psi_a_p7_m3 = _mm_adds_epi16(psi_r_p7_m3, psi_i_p7_m3); - psi_a_p7_m5 = _mm_adds_epi16(psi_r_p7_m5, psi_i_p7_m5); - psi_a_p7_m7 = _mm_adds_epi16(psi_r_p7_m7, psi_i_p7_m7); - psi_a_p5_p7 = _mm_adds_epi16(psi_r_p5_p7, psi_i_p5_p7); - psi_a_p5_p5 = _mm_adds_epi16(psi_r_p5_p5, psi_i_p5_p5); - psi_a_p5_p3 = _mm_adds_epi16(psi_r_p5_p3, psi_i_p5_p3); - psi_a_p5_p1 = _mm_adds_epi16(psi_r_p5_p1, psi_i_p5_p1); - psi_a_p5_m1 = _mm_adds_epi16(psi_r_p5_m1, psi_i_p5_m1); - psi_a_p5_m3 = _mm_adds_epi16(psi_r_p5_m3, psi_i_p5_m3); - psi_a_p5_m5 = _mm_adds_epi16(psi_r_p5_m5, psi_i_p5_m5); - psi_a_p5_m7 = _mm_adds_epi16(psi_r_p5_m7, psi_i_p5_m7); - psi_a_p3_p7 = _mm_adds_epi16(psi_r_p3_p7, psi_i_p3_p7); - psi_a_p3_p5 = _mm_adds_epi16(psi_r_p3_p5, psi_i_p3_p5); - psi_a_p3_p3 = _mm_adds_epi16(psi_r_p3_p3, psi_i_p3_p3); - psi_a_p3_p1 = _mm_adds_epi16(psi_r_p3_p1, psi_i_p3_p1); - psi_a_p3_m1 = _mm_adds_epi16(psi_r_p3_m1, psi_i_p3_m1); - psi_a_p3_m3 = _mm_adds_epi16(psi_r_p3_m3, psi_i_p3_m3); - psi_a_p3_m5 = _mm_adds_epi16(psi_r_p3_m5, psi_i_p3_m5); - psi_a_p3_m7 = _mm_adds_epi16(psi_r_p3_m7, psi_i_p3_m7); - psi_a_p1_p7 = _mm_adds_epi16(psi_r_p1_p7, psi_i_p1_p7); - psi_a_p1_p5 = _mm_adds_epi16(psi_r_p1_p5, psi_i_p1_p5); - psi_a_p1_p3 = _mm_adds_epi16(psi_r_p1_p3, psi_i_p1_p3); - psi_a_p1_p1 = _mm_adds_epi16(psi_r_p1_p1, psi_i_p1_p1); - psi_a_p1_m1 = _mm_adds_epi16(psi_r_p1_m1, psi_i_p1_m1); - psi_a_p1_m3 = _mm_adds_epi16(psi_r_p1_m3, psi_i_p1_m3); - psi_a_p1_m5 = _mm_adds_epi16(psi_r_p1_m5, psi_i_p1_m5); - psi_a_p1_m7 = _mm_adds_epi16(psi_r_p1_m7, psi_i_p1_m7); - psi_a_m1_p7 = _mm_adds_epi16(psi_r_m1_p7, psi_i_m1_p7); - psi_a_m1_p5 = _mm_adds_epi16(psi_r_m1_p5, psi_i_m1_p5); - psi_a_m1_p3 = _mm_adds_epi16(psi_r_m1_p3, psi_i_m1_p3); - psi_a_m1_p1 = _mm_adds_epi16(psi_r_m1_p1, psi_i_m1_p1); - psi_a_m1_m1 = _mm_adds_epi16(psi_r_m1_m1, psi_i_m1_m1); - psi_a_m1_m3 = _mm_adds_epi16(psi_r_m1_m3, psi_i_m1_m3); - psi_a_m1_m5 = _mm_adds_epi16(psi_r_m1_m5, psi_i_m1_m5); - psi_a_m1_m7 = _mm_adds_epi16(psi_r_m1_m7, psi_i_m1_m7); - psi_a_m3_p7 = _mm_adds_epi16(psi_r_m3_p7, psi_i_m3_p7); - psi_a_m3_p5 = _mm_adds_epi16(psi_r_m3_p5, psi_i_m3_p5); - psi_a_m3_p3 = _mm_adds_epi16(psi_r_m3_p3, psi_i_m3_p3); - psi_a_m3_p1 = _mm_adds_epi16(psi_r_m3_p1, psi_i_m3_p1); - psi_a_m3_m1 = _mm_adds_epi16(psi_r_m3_m1, psi_i_m3_m1); - psi_a_m3_m3 = _mm_adds_epi16(psi_r_m3_m3, psi_i_m3_m3); - psi_a_m3_m5 = _mm_adds_epi16(psi_r_m3_m5, psi_i_m3_m5); - psi_a_m3_m7 = _mm_adds_epi16(psi_r_m3_m7, psi_i_m3_m7); - psi_a_m5_p7 = _mm_adds_epi16(psi_r_m5_p7, psi_i_m5_p7); - psi_a_m5_p5 = _mm_adds_epi16(psi_r_m5_p5, psi_i_m5_p5); - psi_a_m5_p3 = _mm_adds_epi16(psi_r_m5_p3, psi_i_m5_p3); - psi_a_m5_p1 = _mm_adds_epi16(psi_r_m5_p1, psi_i_m5_p1); - psi_a_m5_m1 = _mm_adds_epi16(psi_r_m5_m1, psi_i_m5_m1); - psi_a_m5_m3 = _mm_adds_epi16(psi_r_m5_m3, psi_i_m5_m3); - psi_a_m5_m5 = _mm_adds_epi16(psi_r_m5_m5, psi_i_m5_m5); - psi_a_m5_m7 = _mm_adds_epi16(psi_r_m5_m7, psi_i_m5_m7); - psi_a_m7_p7 = _mm_adds_epi16(psi_r_m7_p7, psi_i_m7_p7); - psi_a_m7_p5 = _mm_adds_epi16(psi_r_m7_p5, psi_i_m7_p5); - psi_a_m7_p3 = _mm_adds_epi16(psi_r_m7_p3, psi_i_m7_p3); - psi_a_m7_p1 = _mm_adds_epi16(psi_r_m7_p1, psi_i_m7_p1); - psi_a_m7_m1 = _mm_adds_epi16(psi_r_m7_m1, psi_i_m7_m1); - psi_a_m7_m3 = _mm_adds_epi16(psi_r_m7_m3, psi_i_m7_m3); - psi_a_m7_m5 = _mm_adds_epi16(psi_r_m7_m5, psi_i_m7_m5); - psi_a_m7_m7 = _mm_adds_epi16(psi_r_m7_m7, psi_i_m7_m7); - - // Computing different multiples of ||h0||^2 - // x=1, y=1 - ch_mag_2_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,ONE_OVER_FOUR_SQRT_42); - ch_mag_2_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_2_over_42_with_sigma2,1); - // x=1, y=3 - ch_mag_10_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,FIVE_OVER_FOUR_SQRT_42); - ch_mag_10_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_10_over_42_with_sigma2,1); - // x=1, x=5 - ch_mag_26_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,THIRTEEN_OVER_FOUR_SQRT_42); - ch_mag_26_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_26_over_42_with_sigma2,1); - // x=1, y=7 - ch_mag_50_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42); - ch_mag_50_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_50_over_42_with_sigma2,1); - // x=3, y=3 - ch_mag_18_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,NINE_OVER_FOUR_SQRT_42); - ch_mag_18_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_18_over_42_with_sigma2,1); - // x=3, y=5 - ch_mag_34_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,SEVENTEEN_OVER_FOUR_SQRT_42); - ch_mag_34_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_34_over_42_with_sigma2,1); - // x=3, y=7 - ch_mag_58_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,TWENTYNINE_OVER_FOUR_SQRT_42); - ch_mag_58_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_58_over_42_with_sigma2,2); - // x=5, y=5 - ch_mag_50_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42); - ch_mag_50_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_50_over_42_with_sigma2,1); - // x=5, y=7 - ch_mag_74_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,THIRTYSEVEN_OVER_FOUR_SQRT_42); - ch_mag_74_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_74_over_42_with_sigma2,2); - // x=7, y=7 - ch_mag_98_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,FORTYNINE_OVER_FOUR_SQRT_42); - ch_mag_98_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_98_over_42_with_sigma2,2); - - // Computing Metrics - xmm1 = _mm_adds_epi16(psi_a_p7_p7, y0_p_7_7); - bit_met_p7_p7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p7_p5, y0_p_7_5); - bit_met_p7_p5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p7_p3, y0_p_7_3); - bit_met_p7_p3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p7_p1, y0_p_7_1); - bit_met_p7_p1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p7_m1, y0_m_7_1); - bit_met_p7_m1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p7_m3, y0_m_7_3); - bit_met_p7_m3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p7_m5, y0_m_7_5); - bit_met_p7_m5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p7_m7, y0_m_7_7); - bit_met_p7_m7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p5_p7, y0_p_5_7); - bit_met_p5_p7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p5_p5, y0_p_5_5); - bit_met_p5_p5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p5_p3, y0_p_5_3); - bit_met_p5_p3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p5_p1, y0_p_5_1); - bit_met_p5_p1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p5_m1, y0_m_5_1); - bit_met_p5_m1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p5_m3, y0_m_5_3); - bit_met_p5_m3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p5_m5, y0_m_5_5); - bit_met_p5_m5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p5_m7, y0_m_5_7); - bit_met_p5_m7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p3_p7, y0_p_3_7); - bit_met_p3_p7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p3_p5, y0_p_3_5); - bit_met_p3_p5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p3_p3, y0_p_3_3); - bit_met_p3_p3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p3_p1, y0_p_3_1); - bit_met_p3_p1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p3_m1, y0_m_3_1); - bit_met_p3_m1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p3_m3, y0_m_3_3); - bit_met_p3_m3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p3_m5, y0_m_3_5); - bit_met_p3_m5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p3_m7, y0_m_3_7); - bit_met_p3_m7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p1_p7, y0_p_1_7); - bit_met_p1_p7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p1_p5, y0_p_1_5); - bit_met_p1_p5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p1_p3, y0_p_1_3); - bit_met_p1_p3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p1_p1, y0_p_1_1); - bit_met_p1_p1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p1_m1, y0_m_1_1); - bit_met_p1_m1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p1_m3, y0_m_1_3); - bit_met_p1_m3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p1_m5, y0_m_1_5); - bit_met_p1_m5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); - xmm1 = _mm_adds_epi16(psi_a_p1_m7, y0_m_1_7); - bit_met_p1_m7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - - xmm1 = _mm_subs_epi16(psi_a_m1_p7, y0_m_1_7); - bit_met_m1_p7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m1_p5, y0_m_1_5); - bit_met_m1_p5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m1_p3, y0_m_1_3); - bit_met_m1_p3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m1_p1, y0_m_1_1); - bit_met_m1_p1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m1_m1, y0_p_1_1); - bit_met_m1_m1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m1_m3, y0_p_1_3); - bit_met_m1_m3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m1_m5, y0_p_1_5); - bit_met_m1_m5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m1_m7, y0_p_1_7); - bit_met_m1_m7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m3_p7, y0_m_3_7); - bit_met_m3_p7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m3_p5, y0_m_3_5); - bit_met_m3_p5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m3_p3, y0_m_3_3); - bit_met_m3_p3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m3_p1, y0_m_3_1); - bit_met_m3_p1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m3_m1, y0_p_3_1); - bit_met_m3_m1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m3_m3, y0_p_3_3); - bit_met_m3_m3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m3_m5, y0_p_3_5); - bit_met_m3_m5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m3_m7, y0_p_3_7); - bit_met_m3_m7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m5_p7, y0_m_5_7); - bit_met_m5_p7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m5_p5, y0_m_5_5); - bit_met_m5_p5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m5_p3, y0_m_5_3); - bit_met_m5_p3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m5_p1, y0_m_5_1); - bit_met_m5_p1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m5_m1, y0_p_5_1); - bit_met_m5_m1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m5_m3, y0_p_5_3); - bit_met_m5_m3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m5_m5, y0_p_5_5); - bit_met_m5_m5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m5_m7, y0_p_5_7); - bit_met_m5_m7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m7_p7, y0_m_7_7); - bit_met_m7_p7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m7_p5, y0_m_7_5); - bit_met_m7_p5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m7_p3, y0_m_7_3); - bit_met_m7_p3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m7_p1, y0_m_7_1); - bit_met_m7_p1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m7_m1, y0_p_7_1); - bit_met_m7_m1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m7_m3, y0_p_7_3); - bit_met_m7_m3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m7_m5, y0_p_7_5); - bit_met_m7_m5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); - xmm1 = _mm_subs_epi16(psi_a_m7_m7, y0_p_7_7); - bit_met_m7_m7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); - - // Detection for 1st bit (LTE mapping) - // bit = 1 - xmm0 = _mm_max_epi16(bit_met_m7_p7, bit_met_m7_p5); - xmm1 = _mm_max_epi16(bit_met_m7_p3, bit_met_m7_p1); - xmm2 = _mm_max_epi16(bit_met_m7_m1, bit_met_m7_m3); - xmm3 = _mm_max_epi16(bit_met_m7_m5, bit_met_m7_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_m5_p7, bit_met_m5_p5); - xmm1 = _mm_max_epi16(bit_met_m5_p3, bit_met_m5_p1); - xmm2 = _mm_max_epi16(bit_met_m5_m1, bit_met_m5_m3); - xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m5_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_m3_p7, bit_met_m3_p5); - xmm1 = _mm_max_epi16(bit_met_m3_p3, bit_met_m3_p1); - xmm2 = _mm_max_epi16(bit_met_m3_m1, bit_met_m3_m3); - xmm3 = _mm_max_epi16(bit_met_m3_m5, bit_met_m3_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_m1_p7, bit_met_m1_p5); - xmm1 = _mm_max_epi16(bit_met_m1_p3, bit_met_m1_p1); - xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m1_m3); - xmm3 = _mm_max_epi16(bit_met_m1_m5, bit_met_m1_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - - // bit = 0 - xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p7_p5); - xmm1 = _mm_max_epi16(bit_met_p7_p3, bit_met_p7_p1); - xmm2 = _mm_max_epi16(bit_met_p7_m1, bit_met_p7_m3); - xmm3 = _mm_max_epi16(bit_met_p7_m5, bit_met_p7_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_p5_p7, bit_met_p5_p5); - xmm1 = _mm_max_epi16(bit_met_p5_p3, bit_met_p5_p1); - xmm2 = _mm_max_epi16(bit_met_p5_m1, bit_met_p5_m3); - xmm3 = _mm_max_epi16(bit_met_p5_m5, bit_met_p5_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p3_p7, bit_met_p3_p5); - xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p3_p1); - xmm2 = _mm_max_epi16(bit_met_p3_m1, bit_met_p3_m3); - xmm3 = _mm_max_epi16(bit_met_p3_m5, bit_met_p3_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p1_p7, bit_met_p1_p5); - xmm1 = _mm_max_epi16(bit_met_p1_p3, bit_met_p1_p1); - xmm2 = _mm_max_epi16(bit_met_p1_m1, bit_met_p1_m3); - xmm3 = _mm_max_epi16(bit_met_p1_m5, bit_met_p1_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - - y0r = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); - - // Detection for 2nd bit (LTE mapping) - // bit = 1 - xmm0 = _mm_max_epi16(bit_met_p7_m1, bit_met_p5_m1); - xmm1 = _mm_max_epi16(bit_met_p3_m1, bit_met_p1_m1); - xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m3_m1); - xmm3 = _mm_max_epi16(bit_met_m5_m1, bit_met_m7_m1); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m3, bit_met_p5_m3); - xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p1_m3); - xmm2 = _mm_max_epi16(bit_met_m1_m3, bit_met_m3_m3); - xmm3 = _mm_max_epi16(bit_met_m5_m3, bit_met_m7_m3); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m5, bit_met_p5_m5); - xmm1 = _mm_max_epi16(bit_met_p3_m5, bit_met_p1_m5); - xmm2 = _mm_max_epi16(bit_met_m1_m5, bit_met_m3_m5); - xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m7_m5); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p5_m7); - xmm1 = _mm_max_epi16(bit_met_p3_m7, bit_met_p1_m7); - xmm2 = _mm_max_epi16(bit_met_m1_m7, bit_met_m3_m7); - xmm3 = _mm_max_epi16(bit_met_m5_m7, bit_met_m7_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - - // bit = 0 - xmm0 = _mm_max_epi16(bit_met_p7_p1, bit_met_p5_p1); - xmm1 = _mm_max_epi16(bit_met_p3_p1, bit_met_p1_p1); - xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m3_p1); - xmm3 = _mm_max_epi16(bit_met_m5_p1, bit_met_m7_p1); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_p3, bit_met_p5_p3); - xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p1_p3); - xmm2 = _mm_max_epi16(bit_met_m1_p3, bit_met_m3_p3); - xmm3 = _mm_max_epi16(bit_met_m5_p3, bit_met_m7_p3); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_p5, bit_met_p5_p5); - xmm1 = _mm_max_epi16(bit_met_p3_p5, bit_met_p1_p5); - xmm2 = _mm_max_epi16(bit_met_m1_p5, bit_met_m3_p5); - xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m7_p5); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p5_p7); - xmm1 = _mm_max_epi16(bit_met_p3_p7, bit_met_p1_p7); - xmm2 = _mm_max_epi16(bit_met_m1_p7, bit_met_m3_p7); - xmm3 = _mm_max_epi16(bit_met_m5_p7, bit_met_m7_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - - y1r = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); - - // Detection for 3rd bit (LTE mapping) - xmm0 = _mm_max_epi16(bit_met_m7_m7, bit_met_m7_m5); - xmm1 = _mm_max_epi16(bit_met_m7_m3, bit_met_m7_m1); - xmm2 = _mm_max_epi16(bit_met_m7_p1, bit_met_m7_p3); - xmm3 = _mm_max_epi16(bit_met_m7_p5, bit_met_m7_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_m5_m7, bit_met_m5_m5); - xmm1 = _mm_max_epi16(bit_met_m5_m3, bit_met_m5_m1); - xmm2 = _mm_max_epi16(bit_met_m5_p1, bit_met_m5_p3); - xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m5_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p5_m7, bit_met_p5_m5); - xmm1 = _mm_max_epi16(bit_met_p5_m3, bit_met_p5_m1); - xmm2 = _mm_max_epi16(bit_met_p5_p1, bit_met_p5_p3); - xmm3 = _mm_max_epi16(bit_met_p5_p5, bit_met_p5_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p7_m5); - xmm1 = _mm_max_epi16(bit_met_p7_m3, bit_met_p7_m1); - xmm2 = _mm_max_epi16(bit_met_p7_p1, bit_met_p7_p3); - xmm3 = _mm_max_epi16(bit_met_p7_p5, bit_met_p7_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - - xmm0 = _mm_max_epi16(bit_met_m3_m7, bit_met_m3_m5); - xmm1 = _mm_max_epi16(bit_met_m3_m3, bit_met_m3_m1); - xmm2 = _mm_max_epi16(bit_met_m3_p1, bit_met_m3_p3); - xmm3 = _mm_max_epi16(bit_met_m3_p5, bit_met_m3_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_m1_m7, bit_met_m1_m5); - xmm1 = _mm_max_epi16(bit_met_m1_m3, bit_met_m1_m1); - xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m1_p3); - xmm3 = _mm_max_epi16(bit_met_m1_p5, bit_met_m1_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p1_m7, bit_met_p1_m5); - xmm1 = _mm_max_epi16(bit_met_p1_m3, bit_met_p1_m1); - xmm2 = _mm_max_epi16(bit_met_p1_p1, bit_met_p1_p3); - xmm3 = _mm_max_epi16(bit_met_p1_p5, bit_met_p1_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p3_m7, bit_met_p3_m5); - xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p3_m1); - xmm2 = _mm_max_epi16(bit_met_p3_p1, bit_met_p3_p3); - xmm3 = _mm_max_epi16(bit_met_p3_p5, bit_met_p3_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - - y2r = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); - - // Detection for 4th bit (LTE mapping) - xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p5_p7); - xmm1 = _mm_max_epi16(bit_met_p3_p7, bit_met_p1_p7); - xmm2 = _mm_max_epi16(bit_met_m1_p7, bit_met_m3_p7); - xmm3 = _mm_max_epi16(bit_met_m5_p7, bit_met_m7_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_p5, bit_met_p5_p5); - xmm1 = _mm_max_epi16(bit_met_p3_p5, bit_met_p1_p5); - xmm2 = _mm_max_epi16(bit_met_m1_p5, bit_met_m3_p5); - xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m5_p5); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m5, bit_met_p5_m5); - xmm1 = _mm_max_epi16(bit_met_p3_m5, bit_met_p1_m5); - xmm2 = _mm_max_epi16(bit_met_m1_m5, bit_met_m3_m5); - xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m7_m5); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p5_m7); - xmm1 = _mm_max_epi16(bit_met_p3_m7, bit_met_p1_m7); - xmm2 = _mm_max_epi16(bit_met_m1_m7, bit_met_m3_m7); - xmm3 = _mm_max_epi16(bit_met_m5_m7, bit_met_m7_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - - xmm0 = _mm_max_epi16(bit_met_p7_m1, bit_met_p5_m1); - xmm1 = _mm_max_epi16(bit_met_p3_m1, bit_met_p1_m1); - xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m3_m1); - xmm3 = _mm_max_epi16(bit_met_m5_m1, bit_met_m7_m1); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m3, bit_met_p5_m3); - xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p1_m3); - xmm2 = _mm_max_epi16(bit_met_m1_m3, bit_met_m3_m3); - xmm3 = _mm_max_epi16(bit_met_m5_m3, bit_met_m7_m3); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_p1, bit_met_p5_p1); - xmm1 = _mm_max_epi16(bit_met_p3_p1, bit_met_p1_p1); - xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m3_p1); - xmm3 = _mm_max_epi16(bit_met_m5_p1, bit_met_m7_p1); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_p3, bit_met_p5_p3); - xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p1_p3); - xmm2 = _mm_max_epi16(bit_met_m1_p3, bit_met_m3_p3); - xmm3 = _mm_max_epi16(bit_met_m5_p3, bit_met_m7_p3); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - - y0i = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); - - - // Detection for 5th bit (LTE mapping) - xmm0 = _mm_max_epi16(bit_met_m7_m7, bit_met_m7_m5); - xmm1 = _mm_max_epi16(bit_met_m7_m3, bit_met_m7_m1); - xmm2 = _mm_max_epi16(bit_met_m7_p1, bit_met_m7_p3); - xmm3 = _mm_max_epi16(bit_met_m7_p5, bit_met_m7_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_m1_m7, bit_met_m1_m5); - xmm1 = _mm_max_epi16(bit_met_m1_m3, bit_met_m1_m1); - xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m1_p3); - xmm3 = _mm_max_epi16(bit_met_m1_p5, bit_met_m1_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p1_m7, bit_met_p1_m5); - xmm1 = _mm_max_epi16(bit_met_p1_m3, bit_met_p1_m1); - xmm2 = _mm_max_epi16(bit_met_p1_p1, bit_met_p1_p3); - xmm3 = _mm_max_epi16(bit_met_p1_p5, bit_met_p1_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p7_m5); - xmm1 = _mm_max_epi16(bit_met_p7_m3, bit_met_p7_m1); - xmm2 = _mm_max_epi16(bit_met_p7_p1, bit_met_p7_p3); - xmm3 = _mm_max_epi16(bit_met_p7_p5, bit_met_p7_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - - xmm0 = _mm_max_epi16(bit_met_m5_m7, bit_met_m5_m5); - xmm1 = _mm_max_epi16(bit_met_m5_m3, bit_met_m5_m1); - xmm2 = _mm_max_epi16(bit_met_m5_p1, bit_met_m5_p3); - xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m5_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_m3_m7, bit_met_m3_m5); - xmm1 = _mm_max_epi16(bit_met_m3_m3, bit_met_m3_m1); - xmm2 = _mm_max_epi16(bit_met_m3_p1, bit_met_m3_p3); - xmm3 = _mm_max_epi16(bit_met_m3_p5, bit_met_m3_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p3_m7, bit_met_p3_m5); - xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p3_m1); - xmm2 = _mm_max_epi16(bit_met_p3_p1, bit_met_p3_p3); - xmm3 = _mm_max_epi16(bit_met_p3_p5, bit_met_p3_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p5_m7, bit_met_p5_m5); - xmm1 = _mm_max_epi16(bit_met_p5_m3, bit_met_p5_m1); - xmm2 = _mm_max_epi16(bit_met_p5_p1, bit_met_p5_p3); - xmm3 = _mm_max_epi16(bit_met_p5_p5, bit_met_p5_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - - y1i = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); - - // Detection for 6th bit (LTE mapping) - xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p5_p7); - xmm1 = _mm_max_epi16(bit_met_p3_p7, bit_met_p1_p7); - xmm2 = _mm_max_epi16(bit_met_m1_p7, bit_met_m3_p7); - xmm3 = _mm_max_epi16(bit_met_m5_p7, bit_met_m7_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_p1, bit_met_p5_p1); - xmm1 = _mm_max_epi16(bit_met_p3_p1, bit_met_p1_p1); - xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m3_p1); - xmm3 = _mm_max_epi16(bit_met_m5_p1, bit_met_m5_p1); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m1, bit_met_p5_m1); - xmm1 = _mm_max_epi16(bit_met_p3_m1, bit_met_p1_m1); - xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m3_m1); - xmm3 = _mm_max_epi16(bit_met_m5_m1, bit_met_m7_m1); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p5_m7); - xmm1 = _mm_max_epi16(bit_met_p3_m7, bit_met_p1_m7); - xmm2 = _mm_max_epi16(bit_met_m1_m7, bit_met_m3_m7); - xmm3 = _mm_max_epi16(bit_met_m5_m7, bit_met_m7_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - - xmm0 = _mm_max_epi16(bit_met_p7_m5, bit_met_p5_m5); - xmm1 = _mm_max_epi16(bit_met_p3_m5, bit_met_p1_m5); - xmm2 = _mm_max_epi16(bit_met_m1_m5, bit_met_m3_m5); - xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m7_m5); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m3, bit_met_p5_m3); - xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p1_m3); - xmm2 = _mm_max_epi16(bit_met_m1_m3, bit_met_m3_m3); - xmm3 = _mm_max_epi16(bit_met_m5_m3, bit_met_m7_m3); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_p3, bit_met_p5_p3); - xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p1_p3); - xmm2 = _mm_max_epi16(bit_met_m1_p3, bit_met_m3_p3); - xmm3 = _mm_max_epi16(bit_met_m5_p3, bit_met_m7_p3); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_p5, bit_met_p5_p5); - xmm1 = _mm_max_epi16(bit_met_p3_p5, bit_met_p1_p5); - xmm2 = _mm_max_epi16(bit_met_m1_p5, bit_met_m3_p5); - xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m7_p5); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - - y2i = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); - - - // map to output stream, difficult to do in SIMD since we have 6 16bit LLRs - // RE 1 - j = 24*i; - stream0_out[j + 0] = ((short *)&y0r)[0]; - stream0_out[j + 1] = ((short *)&y1r)[0]; - stream0_out[j + 2] = ((short *)&y2r)[0]; - stream0_out[j + 3] = ((short *)&y0i)[0]; - stream0_out[j + 4] = ((short *)&y1i)[0]; - stream0_out[j + 5] = ((short *)&y2i)[0]; - // RE 2 - stream0_out[j + 6] = ((short *)&y0r)[1]; - stream0_out[j + 7] = ((short *)&y1r)[1]; - stream0_out[j + 8] = ((short *)&y2r)[1]; - stream0_out[j + 9] = ((short *)&y0i)[1]; - stream0_out[j + 10] = ((short *)&y1i)[1]; - stream0_out[j + 11] = ((short *)&y2i)[1]; - // RE 3 - stream0_out[j + 12] = ((short *)&y0r)[2]; - stream0_out[j + 13] = ((short *)&y1r)[2]; - stream0_out[j + 14] = ((short *)&y2r)[2]; - stream0_out[j + 15] = ((short *)&y0i)[2]; - stream0_out[j + 16] = ((short *)&y1i)[2]; - stream0_out[j + 17] = ((short *)&y2i)[2]; - // RE 4 - stream0_out[j + 18] = ((short *)&y0r)[3]; - stream0_out[j + 19] = ((short *)&y1r)[3]; - stream0_out[j + 20] = ((short *)&y2r)[3]; - stream0_out[j + 21] = ((short *)&y0i)[3]; - stream0_out[j + 22] = ((short *)&y1i)[3]; - stream0_out[j + 23] = ((short *)&y2i)[3]; - // RE 5 - stream0_out[j + 24] = ((short *)&y0r)[4]; - stream0_out[j + 25] = ((short *)&y1r)[4]; - stream0_out[j + 26] = ((short *)&y2r)[4]; - stream0_out[j + 27] = ((short *)&y0i)[4]; - stream0_out[j + 28] = ((short *)&y1i)[4]; - stream0_out[j + 29] = ((short *)&y2i)[4]; - // RE 6 - stream0_out[j + 30] = ((short *)&y0r)[5]; - stream0_out[j + 31] = ((short *)&y1r)[5]; - stream0_out[j + 32] = ((short *)&y2r)[5]; - stream0_out[j + 33] = ((short *)&y0i)[5]; - stream0_out[j + 34] = ((short *)&y1i)[5]; - stream0_out[j + 35] = ((short *)&y2i)[5]; - // RE 7 - stream0_out[j + 36] = ((short *)&y0r)[6]; - stream0_out[j + 37] = ((short *)&y1r)[6]; - stream0_out[j + 38] = ((short *)&y2r)[6]; - stream0_out[j + 39] = ((short *)&y0i)[6]; - stream0_out[j + 40] = ((short *)&y1i)[6]; - stream0_out[j + 41] = ((short *)&y2i)[6]; - // RE 8 - stream0_out[j + 42] = ((short *)&y0r)[7]; - stream0_out[j + 43] = ((short *)&y1r)[7]; - stream0_out[j + 44] = ((short *)&y2r)[7]; - stream0_out[j + 45] = ((short *)&y0i)[7]; - stream0_out[j + 46] = ((short *)&y1i)[7]; - stream0_out[j + 47] = ((short *)&y2i)[7]; -#elif defined(__arm__) || defined(__aarch64__) - -#endif - } - -#if defined(__x86_64__) || defined(__i386__) - _mm_empty(); - _m_empty(); -#endif -} - - -int nr_dlsch_64qam_qpsk_llr(NR_DL_FRAME_PARMS *frame_parms, - int32_t **rxdataF_comp, - int32_t **rxdataF_comp_i, - int32_t **dl_ch_mag, - int32_t **rho_i, - int16_t *dlsch_llr, - uint8_t symbol, - uint8_t first_symbol_flag, - uint16_t nb_rb, - uint16_t pbch_pss_sss_adjust, - int16_t **llr16p) -{ - - int16_t *rxF = (int16_t*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)]; - int16_t *rxF_i = (int16_t*)&rxdataF_comp_i[0][(symbol*frame_parms->N_RB_DL*12)]; - int16_t *ch_mag = (int16_t*)&dl_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)]; - int16_t *rho = (int16_t*)&rho_i[0][(symbol*frame_parms->N_RB_DL*12)]; - int16_t *llr16; - int len; - uint8_t symbol_mod = (symbol >= (7-frame_parms->Ncp))? (symbol-(7-frame_parms->Ncp)) : symbol; - - //first symbol has different structure due to more pilots - if (first_symbol_flag == 1) { - llr16 = (int16_t*)dlsch_llr; - } else { - llr16 = (int16_t*)(*llr16p); - } - - AssertFatal(llr16!=NULL,"nr_dlsch_16qam_64qam_llr:llr is null, symbol %d\n",symbol); - - if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) { - // if symbol has pilots - if (frame_parms->nb_antenna_ports_gNB!=1) - // in 2 antenna ports we have 8 REs per symbol per RB - len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3); - else - // for 1 antenna port we have 10 REs per symbol per RB - len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6); - } else { - // symbol has no pilots - len = (nb_rb*12) - pbch_pss_sss_adjust; - } - - nr_qam64_qpsk((short *)rxF, - (short *)rxF_i, - (short *)ch_mag, - (short *)llr16, - (short *)rho, - len); - - llr16 += (6*len); - *llr16p = (short *)llr16; - return(0); -} - - - -void nr_qam64_qam16(short *stream0_in, - short *stream1_in, - short *ch_mag, - short *ch_mag_i, - short *stream0_out, - short *rho01, - int length - ) -{ - - /* - Author: S. Wagner - Date: 31-07-12 - - Input: - stream0_in: MF filter for 1st stream, i.e., y0=h0'*y - stream1_in: MF filter for 2nd stream, i.e., y1=h1'*y - ch_mag: 4*h0/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc - ch_mag_i: 4*h1/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc - rho01: Channel cross correlation, i.e., h1'*h0 - - Output: - stream0_out: output LLRs for 1st stream - */ - -#if defined(__x86_64__) || defined(__i386__) - - __m128i *rho01_128i = (__m128i *)rho01; - __m128i *stream0_128i_in = (__m128i *)stream0_in; - __m128i *stream1_128i_in = (__m128i *)stream1_in; - __m128i *ch_mag_128i = (__m128i *)ch_mag; - __m128i *ch_mag_128i_i = (__m128i *)ch_mag_i; - - __m128i ONE_OVER_SQRT_42 = _mm_set1_epi16(10112); // round(1/sqrt(42)*2^16) - __m128i THREE_OVER_SQRT_42 = _mm_set1_epi16(30337); // round(3/sqrt(42)*2^16) - __m128i FIVE_OVER_SQRT_42 = _mm_set1_epi16(25281); // round(5/sqrt(42)*2^15) - __m128i SEVEN_OVER_SQRT_42 = _mm_set1_epi16(17697); // round(5/sqrt(42)*2^15) - __m128i FORTYNINE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(30969); // round(49/(4*sqrt(42))*2^14), Q2.14 - __m128i THIRTYSEVEN_OVER_FOUR_SQRT_42 = _mm_set1_epi16(23385); // round(37/(4*sqrt(42))*2^14), Q2.14 - __m128i TWENTYFIVE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(31601); // round(25/(4*sqrt(42))*2^15) - __m128i TWENTYNINE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(18329); // round(29/(4*sqrt(42))*2^15), Q2.14 - __m128i SEVENTEEN_OVER_FOUR_SQRT_42 = _mm_set1_epi16(21489); // round(17/(4*sqrt(42))*2^15) - __m128i NINE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(11376); // round(9/(4*sqrt(42))*2^15) - __m128i THIRTEEN_OVER_FOUR_SQRT_42 = _mm_set1_epi16(16433); // round(13/(4*sqrt(42))*2^15) - __m128i FIVE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(6320); // round(5/(4*sqrt(42))*2^15) - __m128i ONE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(1264); // round(1/(4*sqrt(42))*2^15) - __m128i ONE_OVER_SQRT_10_Q15 = _mm_set1_epi16(10362); // round(1/sqrt(10)*2^15) - __m128i THREE_OVER_SQRT_10 = _mm_set1_epi16(31086); // round(3/sqrt(10)*2^15) - __m128i SQRT_10_OVER_FOUR = _mm_set1_epi16(25905); // round(sqrt(10)/4*2^15) - - - __m128i ch_mag_int; - __m128i ch_mag_des; - __m128i ch_mag_98_over_42_with_sigma2; - __m128i ch_mag_74_over_42_with_sigma2; - __m128i ch_mag_58_over_42_with_sigma2; - __m128i ch_mag_50_over_42_with_sigma2; - __m128i ch_mag_34_over_42_with_sigma2; - __m128i ch_mag_18_over_42_with_sigma2; - __m128i ch_mag_26_over_42_with_sigma2; - __m128i ch_mag_10_over_42_with_sigma2; - __m128i ch_mag_2_over_42_with_sigma2; - __m128i y0r_one_over_sqrt_21; - __m128i y0r_three_over_sqrt_21; - __m128i y0r_five_over_sqrt_21; - __m128i y0r_seven_over_sqrt_21; - __m128i y0i_one_over_sqrt_21; - __m128i y0i_three_over_sqrt_21; - __m128i y0i_five_over_sqrt_21; - __m128i y0i_seven_over_sqrt_21; - -#elif defined(__arm__) || defined(__aarch64__) - -#endif - int i,j; - - - - for (i=0; i<length>>2; i+=2) { - -#if defined(__x86_64__) || defined(__i386__) - // Get rho - xmm0 = rho01_128i[i]; - xmm1 = rho01_128i[i+1]; - xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] - //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] - xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho) - xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho) - rho_rpi = _mm_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho) - rho_rmi = _mm_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho) - - // Compute the different rhos - rho_rpi_1_1 = _mm_mulhi_epi16(rho_rpi, ONE_OVER_SQRT_42); - rho_rmi_1_1 = _mm_mulhi_epi16(rho_rmi, ONE_OVER_SQRT_42); - rho_rpi_3_3 = _mm_mulhi_epi16(rho_rpi, THREE_OVER_SQRT_42); - rho_rmi_3_3 = _mm_mulhi_epi16(rho_rmi, THREE_OVER_SQRT_42); - rho_rpi_5_5 = _mm_mulhi_epi16(rho_rpi, FIVE_OVER_SQRT_42); - rho_rmi_5_5 = _mm_mulhi_epi16(rho_rmi, FIVE_OVER_SQRT_42); - rho_rpi_7_7 = _mm_mulhi_epi16(rho_rpi, SEVEN_OVER_SQRT_42); - rho_rmi_7_7 = _mm_mulhi_epi16(rho_rmi, SEVEN_OVER_SQRT_42); - - rho_rpi_5_5 = _mm_slli_epi16(rho_rpi_5_5, 1); - rho_rmi_5_5 = _mm_slli_epi16(rho_rmi_5_5, 1); - rho_rpi_7_7 = _mm_slli_epi16(rho_rpi_7_7, 2); - rho_rmi_7_7 = _mm_slli_epi16(rho_rmi_7_7, 2); - - xmm4 = _mm_mulhi_epi16(xmm2, ONE_OVER_SQRT_42); - xmm5 = _mm_mulhi_epi16(xmm3, ONE_OVER_SQRT_42); - xmm6 = _mm_mulhi_epi16(xmm3, THREE_OVER_SQRT_42); - xmm7 = _mm_mulhi_epi16(xmm3, FIVE_OVER_SQRT_42); - xmm8 = _mm_mulhi_epi16(xmm3, SEVEN_OVER_SQRT_42); - xmm7 = _mm_slli_epi16(xmm7, 1); - xmm8 = _mm_slli_epi16(xmm8, 2); - - rho_rpi_1_3 = _mm_adds_epi16(xmm4, xmm6); - rho_rmi_1_3 = _mm_subs_epi16(xmm4, xmm6); - rho_rpi_1_5 = _mm_adds_epi16(xmm4, xmm7); - rho_rmi_1_5 = _mm_subs_epi16(xmm4, xmm7); - rho_rpi_1_7 = _mm_adds_epi16(xmm4, xmm8); - rho_rmi_1_7 = _mm_subs_epi16(xmm4, xmm8); - - xmm4 = _mm_mulhi_epi16(xmm2, THREE_OVER_SQRT_42); - rho_rpi_3_1 = _mm_adds_epi16(xmm4, xmm5); - rho_rmi_3_1 = _mm_subs_epi16(xmm4, xmm5); - rho_rpi_3_5 = _mm_adds_epi16(xmm4, xmm7); - rho_rmi_3_5 = _mm_subs_epi16(xmm4, xmm7); - rho_rpi_3_7 = _mm_adds_epi16(xmm4, xmm8); - rho_rmi_3_7 = _mm_subs_epi16(xmm4, xmm8); - - xmm4 = _mm_mulhi_epi16(xmm2, FIVE_OVER_SQRT_42); - xmm4 = _mm_slli_epi16(xmm4, 1); - rho_rpi_5_1 = _mm_adds_epi16(xmm4, xmm5); - rho_rmi_5_1 = _mm_subs_epi16(xmm4, xmm5); - rho_rpi_5_3 = _mm_adds_epi16(xmm4, xmm6); - rho_rmi_5_3 = _mm_subs_epi16(xmm4, xmm6); - rho_rpi_5_7 = _mm_adds_epi16(xmm4, xmm8); - rho_rmi_5_7 = _mm_subs_epi16(xmm4, xmm8); - - xmm4 = _mm_mulhi_epi16(xmm2, SEVEN_OVER_SQRT_42); - xmm4 = _mm_slli_epi16(xmm4, 2); - rho_rpi_7_1 = _mm_adds_epi16(xmm4, xmm5); - rho_rmi_7_1 = _mm_subs_epi16(xmm4, xmm5); - rho_rpi_7_3 = _mm_adds_epi16(xmm4, xmm6); - rho_rmi_7_3 = _mm_subs_epi16(xmm4, xmm6); - rho_rpi_7_5 = _mm_adds_epi16(xmm4, xmm7); - rho_rmi_7_5 = _mm_subs_epi16(xmm4, xmm7); - - // Rearrange interfering MF output - xmm0 = stream1_128i_in[i]; - xmm1 = stream1_128i_in[i+1]; - xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] - //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] - y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)] - y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)] - - // Psi_r calculation from rho_rpi or rho_rmi - xmm0 = _mm_setzero_si128(); // ZERO for abs_pi16 - xmm2 = _mm_subs_epi16(rho_rpi_7_7, y1r); - psi_r_p7_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_7_5, y1r); - psi_r_p7_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_7_3, y1r); - psi_r_p7_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_7_1, y1r); - psi_r_p7_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_7_1, y1r); - psi_r_p7_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_7_3, y1r); - psi_r_p7_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_7_5, y1r); - psi_r_p7_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_7_7, y1r); - psi_r_p7_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_5_7, y1r); - psi_r_p5_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_5_5, y1r); - psi_r_p5_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_5_3, y1r); - psi_r_p5_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_5_1, y1r); - psi_r_p5_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_5_1, y1r); - psi_r_p5_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_5_3, y1r); - psi_r_p5_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_5_5, y1r); - psi_r_p5_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_5_7, y1r); - psi_r_p5_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_3_7, y1r); - psi_r_p3_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_3_5, y1r); - psi_r_p3_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_3_3, y1r); - psi_r_p3_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_3_1, y1r); - psi_r_p3_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_3_1, y1r); - psi_r_p3_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_3_3, y1r); - psi_r_p3_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_3_5, y1r); - psi_r_p3_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_3_7, y1r); - psi_r_p3_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_1_7, y1r); - psi_r_p1_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_1_5, y1r); - psi_r_p1_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_1_3, y1r); - psi_r_p1_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_1_1, y1r); - psi_r_p1_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_1_1, y1r); - psi_r_p1_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_1_3, y1r); - psi_r_p1_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_1_5, y1r); - psi_r_p1_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_1_7, y1r); - psi_r_p1_m7 = _mm_abs_epi16(xmm2); - - xmm2 = _mm_adds_epi16(rho_rmi_1_7, y1r); - psi_r_m1_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_1_5, y1r); - psi_r_m1_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_1_3, y1r); - psi_r_m1_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_1_1, y1r); - psi_r_m1_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_1_1, y1r); - psi_r_m1_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_1_3, y1r); - psi_r_m1_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_1_5, y1r); - psi_r_m1_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_1_7, y1r); - psi_r_m1_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_3_7, y1r); - psi_r_m3_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_3_5, y1r); - psi_r_m3_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_3_3, y1r); - psi_r_m3_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_3_1, y1r); - psi_r_m3_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_3_1, y1r); - psi_r_m3_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_3_3, y1r); - psi_r_m3_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_3_5, y1r); - psi_r_m3_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_3_7, y1r); - psi_r_m3_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_5_7, y1r); - psi_r_m5_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_5_5, y1r); - psi_r_m5_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_5_3, y1r); - psi_r_m5_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_5_1, y1r); - psi_r_m5_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_5_1, y1r); - psi_r_m5_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_5_3, y1r); - psi_r_m5_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_5_5, y1r); - psi_r_m5_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_5_7, y1r); - psi_r_m5_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_7_7, y1r); - psi_r_m7_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_7_5, y1r); - psi_r_m7_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_7_3, y1r); - psi_r_m7_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_7_1, y1r); - psi_r_m7_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_7_1, y1r); - psi_r_m7_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_7_3, y1r); - psi_r_m7_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_7_5, y1r); - psi_r_m7_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_7_7, y1r); - psi_r_m7_m7 = _mm_abs_epi16(xmm2); - - // Psi_i calculation from rho_rpi or rho_rmi - xmm2 = _mm_subs_epi16(rho_rmi_7_7, y1i); - psi_i_p7_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_5_7, y1i); - psi_i_p7_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_3_7, y1i); - psi_i_p7_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_1_7, y1i); - psi_i_p7_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_1_7, y1i); - psi_i_p7_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_3_7, y1i); - psi_i_p7_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_5_7, y1i); - psi_i_p7_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_7_7, y1i); - psi_i_p7_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_7_5, y1i); - psi_i_p5_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_5_5, y1i); - psi_i_p5_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_3_5, y1i); - psi_i_p5_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_1_5, y1i); - psi_i_p5_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_1_5, y1i); - psi_i_p5_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_3_5, y1i); - psi_i_p5_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_5_5, y1i); - psi_i_p5_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_7_5, y1i); - psi_i_p5_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_7_3, y1i); - psi_i_p3_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_5_3, y1i); - psi_i_p3_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_3_3, y1i); - psi_i_p3_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_1_3, y1i); - psi_i_p3_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_1_3, y1i); - psi_i_p3_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_3_3, y1i); - psi_i_p3_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_5_3, y1i); - psi_i_p3_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_7_3, y1i); - psi_i_p3_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_7_1, y1i); - psi_i_p1_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_5_1, y1i); - psi_i_p1_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_3_1, y1i); - psi_i_p1_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_1_1, y1i); - psi_i_p1_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_1_1, y1i); - psi_i_p1_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_3_1, y1i); - psi_i_p1_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_5_1, y1i); - psi_i_p1_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_7_1, y1i); - psi_i_p1_m7 = _mm_abs_epi16(xmm2); - - xmm2 = _mm_subs_epi16(rho_rpi_7_1, y1i); - psi_i_m1_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_5_1, y1i); - psi_i_m1_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_3_1, y1i); - psi_i_m1_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_1_1, y1i); - psi_i_m1_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_1_1, y1i); - psi_i_m1_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_3_1, y1i); - psi_i_m1_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_5_1, y1i); - psi_i_m1_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_7_1, y1i); - psi_i_m1_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_7_3, y1i); - psi_i_m3_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_5_3, y1i); - psi_i_m3_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_3_3, y1i); - psi_i_m3_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_1_3, y1i); - psi_i_m3_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_1_3, y1i); - psi_i_m3_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_3_3, y1i); - psi_i_m3_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_5_3, y1i); - psi_i_m3_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_7_3, y1i); - psi_i_m3_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_7_5, y1i); - psi_i_m5_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_5_5, y1i); - psi_i_m5_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_3_5, y1i); - psi_i_m5_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_1_5, y1i); - psi_i_m5_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_1_5, y1i); - psi_i_m5_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_3_5, y1i); - psi_i_m5_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_5_5, y1i); - psi_i_m5_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_7_5, y1i); - psi_i_m5_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_7_7, y1i); - psi_i_m7_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_5_7, y1i); - psi_i_m7_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_3_7, y1i); - psi_i_m7_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_1_7, y1i); - psi_i_m7_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_1_7, y1i); - psi_i_m7_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_3_7, y1i); - psi_i_m7_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_5_7, y1i); - psi_i_m7_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_7_7, y1i); - psi_i_m7_m7 = _mm_abs_epi16(xmm2); - - - // Rearrange desired MF output - xmm0 = stream0_128i_in[i]; - xmm1 = stream0_128i_in[i+1]; - xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] - //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] - y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)] - y0i = _mm_unpackhi_epi64(xmm0,xmm1); - - // Rearrange desired channel magnitudes - xmm2 = ch_mag_128i[i]; // = [|h|^2(1),|h|^2(1),|h|^2(2),|h|^2(2)]*(2/sqrt(10)) - xmm3 = ch_mag_128i[i+1]; // = [|h|^2(3),|h|^2(3),|h|^2(4),|h|^2(4)]*(2/sqrt(10)) - xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - ch_mag_des = _mm_unpacklo_epi64(xmm2,xmm3); - - // Rearrange interfering channel magnitudes - xmm2 = ch_mag_128i_i[i]; - xmm3 = ch_mag_128i_i[i+1]; - xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - ch_mag_int = _mm_unpacklo_epi64(xmm2,xmm3); - - y0r_one_over_sqrt_21 = _mm_mulhi_epi16(y0r, ONE_OVER_SQRT_42); - y0r_three_over_sqrt_21 = _mm_mulhi_epi16(y0r, THREE_OVER_SQRT_42); - y0r_five_over_sqrt_21 = _mm_mulhi_epi16(y0r, FIVE_OVER_SQRT_42); - y0r_five_over_sqrt_21 = _mm_slli_epi16(y0r_five_over_sqrt_21, 1); - y0r_seven_over_sqrt_21 = _mm_mulhi_epi16(y0r, SEVEN_OVER_SQRT_42); - y0r_seven_over_sqrt_21 = _mm_slli_epi16(y0r_seven_over_sqrt_21, 2); // Q2.14 - - y0i_one_over_sqrt_21 = _mm_mulhi_epi16(y0i, ONE_OVER_SQRT_42); - y0i_three_over_sqrt_21 = _mm_mulhi_epi16(y0i, THREE_OVER_SQRT_42); - y0i_five_over_sqrt_21 = _mm_mulhi_epi16(y0i, FIVE_OVER_SQRT_42); - y0i_five_over_sqrt_21 = _mm_slli_epi16(y0i_five_over_sqrt_21, 1); - y0i_seven_over_sqrt_21 = _mm_mulhi_epi16(y0i, SEVEN_OVER_SQRT_42); - y0i_seven_over_sqrt_21 = _mm_slli_epi16(y0i_seven_over_sqrt_21, 2); // Q2.14 - - y0_p_7_1 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21); - y0_p_7_3 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21); - y0_p_7_5 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21); - y0_p_7_7 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21); - y0_p_5_1 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21); - y0_p_5_3 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21); - y0_p_5_5 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21); - y0_p_5_7 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21); - y0_p_3_1 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21); - y0_p_3_3 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21); - y0_p_3_5 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21); - y0_p_3_7 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21); - y0_p_1_1 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21); - y0_p_1_3 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21); - y0_p_1_5 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21); - y0_p_1_7 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21); - - y0_m_1_1 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21); - y0_m_1_3 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21); - y0_m_1_5 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21); - y0_m_1_7 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21); - y0_m_3_1 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21); - y0_m_3_3 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21); - y0_m_3_5 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21); - y0_m_3_7 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21); - y0_m_5_1 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21); - y0_m_5_3 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21); - y0_m_5_5 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21); - y0_m_5_7 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21); - y0_m_7_1 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21); - y0_m_7_3 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21); - y0_m_7_5 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21); - y0_m_7_7 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21); - - interference_abs_epi16(psi_r_p7_p7, ch_mag_int, a_r_p7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p7_p5, ch_mag_int, a_r_p7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p7_p3, ch_mag_int, a_r_p7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p7_p1, ch_mag_int, a_r_p7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p7_m1, ch_mag_int, a_r_p7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p7_m3, ch_mag_int, a_r_p7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p7_m5, ch_mag_int, a_r_p7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p7_m7, ch_mag_int, a_r_p7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p5_p7, ch_mag_int, a_r_p5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p5_p5, ch_mag_int, a_r_p5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p5_p3, ch_mag_int, a_r_p5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p5_p1, ch_mag_int, a_r_p5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p5_m1, ch_mag_int, a_r_p5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p5_m3, ch_mag_int, a_r_p5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p5_m5, ch_mag_int, a_r_p5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p5_m7, ch_mag_int, a_r_p5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p3_p7, ch_mag_int, a_r_p3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p3_p5, ch_mag_int, a_r_p3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p3_p3, ch_mag_int, a_r_p3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p3_p1, ch_mag_int, a_r_p3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p3_m1, ch_mag_int, a_r_p3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p3_m3, ch_mag_int, a_r_p3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p3_m5, ch_mag_int, a_r_p3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p3_m7, ch_mag_int, a_r_p3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p1_p7, ch_mag_int, a_r_p1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p1_p5, ch_mag_int, a_r_p1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p1_p3, ch_mag_int, a_r_p1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p1_p1, ch_mag_int, a_r_p1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p1_m1, ch_mag_int, a_r_p1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p1_m3, ch_mag_int, a_r_p1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p1_m5, ch_mag_int, a_r_p1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_p1_m7, ch_mag_int, a_r_p1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m1_p7, ch_mag_int, a_r_m1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m1_p5, ch_mag_int, a_r_m1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m1_p3, ch_mag_int, a_r_m1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m1_p1, ch_mag_int, a_r_m1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m1_m1, ch_mag_int, a_r_m1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m1_m3, ch_mag_int, a_r_m1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m1_m5, ch_mag_int, a_r_m1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m1_m7, ch_mag_int, a_r_m1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m3_p7, ch_mag_int, a_r_m3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m3_p5, ch_mag_int, a_r_m3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m3_p3, ch_mag_int, a_r_m3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m3_p1, ch_mag_int, a_r_m3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m3_m1, ch_mag_int, a_r_m3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m3_m3, ch_mag_int, a_r_m3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m3_m5, ch_mag_int, a_r_m3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m3_m7, ch_mag_int, a_r_m3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m5_p7, ch_mag_int, a_r_m5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m5_p5, ch_mag_int, a_r_m5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m5_p3, ch_mag_int, a_r_m5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m5_p1, ch_mag_int, a_r_m5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m5_m1, ch_mag_int, a_r_m5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m5_m3, ch_mag_int, a_r_m5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m5_m5, ch_mag_int, a_r_m5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m5_m7, ch_mag_int, a_r_m5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m7_p7, ch_mag_int, a_r_m7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m7_p5, ch_mag_int, a_r_m7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m7_p3, ch_mag_int, a_r_m7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m7_p1, ch_mag_int, a_r_m7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m7_m1, ch_mag_int, a_r_m7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m7_m3, ch_mag_int, a_r_m7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m7_m5, ch_mag_int, a_r_m7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_r_m7_m7, ch_mag_int, a_r_m7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - - interference_abs_epi16(psi_i_p7_p7, ch_mag_int, a_i_p7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p7_p5, ch_mag_int, a_i_p7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p7_p3, ch_mag_int, a_i_p7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p7_p1, ch_mag_int, a_i_p7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p7_m1, ch_mag_int, a_i_p7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p7_m3, ch_mag_int, a_i_p7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p7_m5, ch_mag_int, a_i_p7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p7_m7, ch_mag_int, a_i_p7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p5_p7, ch_mag_int, a_i_p5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p5_p5, ch_mag_int, a_i_p5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p5_p3, ch_mag_int, a_i_p5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p5_p1, ch_mag_int, a_i_p5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p5_m1, ch_mag_int, a_i_p5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p5_m3, ch_mag_int, a_i_p5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p5_m5, ch_mag_int, a_i_p5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p5_m7, ch_mag_int, a_i_p5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p3_p7, ch_mag_int, a_i_p3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p3_p5, ch_mag_int, a_i_p3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p3_p3, ch_mag_int, a_i_p3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p3_p1, ch_mag_int, a_i_p3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p3_m1, ch_mag_int, a_i_p3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p3_m3, ch_mag_int, a_i_p3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p3_m5, ch_mag_int, a_i_p3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p3_m7, ch_mag_int, a_i_p3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p1_p7, ch_mag_int, a_i_p1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p1_p5, ch_mag_int, a_i_p1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p1_p3, ch_mag_int, a_i_p1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p1_p1, ch_mag_int, a_i_p1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p1_m1, ch_mag_int, a_i_p1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p1_m3, ch_mag_int, a_i_p1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p1_m5, ch_mag_int, a_i_p1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_p1_m7, ch_mag_int, a_i_p1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m1_p7, ch_mag_int, a_i_m1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m1_p5, ch_mag_int, a_i_m1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m1_p3, ch_mag_int, a_i_m1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m1_p1, ch_mag_int, a_i_m1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m1_m1, ch_mag_int, a_i_m1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m1_m3, ch_mag_int, a_i_m1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m1_m5, ch_mag_int, a_i_m1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m1_m7, ch_mag_int, a_i_m1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m3_p7, ch_mag_int, a_i_m3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m3_p5, ch_mag_int, a_i_m3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m3_p3, ch_mag_int, a_i_m3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m3_p1, ch_mag_int, a_i_m3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m3_m1, ch_mag_int, a_i_m3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m3_m3, ch_mag_int, a_i_m3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m3_m5, ch_mag_int, a_i_m3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m3_m7, ch_mag_int, a_i_m3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m5_p7, ch_mag_int, a_i_m5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m5_p5, ch_mag_int, a_i_m5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m5_p3, ch_mag_int, a_i_m5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m5_p1, ch_mag_int, a_i_m5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m5_m1, ch_mag_int, a_i_m5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m5_m3, ch_mag_int, a_i_m5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m5_m5, ch_mag_int, a_i_m5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m5_m7, ch_mag_int, a_i_m5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m7_p7, ch_mag_int, a_i_m7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m7_p5, ch_mag_int, a_i_m7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m7_p3, ch_mag_int, a_i_m7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m7_p1, ch_mag_int, a_i_m7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m7_m1, ch_mag_int, a_i_m7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m7_m3, ch_mag_int, a_i_m7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m7_m5, ch_mag_int, a_i_m7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - interference_abs_epi16(psi_i_m7_m7, ch_mag_int, a_i_m7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); - - // Calculation of a group of two terms in the bit metric involving product of psi and interference - prodsum_psi_a_epi16(psi_r_p7_p7, a_r_p7_p7, psi_i_p7_p7, a_i_p7_p7, psi_a_p7_p7); - prodsum_psi_a_epi16(psi_r_p7_p5, a_r_p7_p5, psi_i_p7_p5, a_i_p7_p5, psi_a_p7_p5); - prodsum_psi_a_epi16(psi_r_p7_p3, a_r_p7_p3, psi_i_p7_p3, a_i_p7_p3, psi_a_p7_p3); - prodsum_psi_a_epi16(psi_r_p7_p1, a_r_p7_p1, psi_i_p7_p1, a_i_p7_p1, psi_a_p7_p1); - prodsum_psi_a_epi16(psi_r_p7_m1, a_r_p7_m1, psi_i_p7_m1, a_i_p7_m1, psi_a_p7_m1); - prodsum_psi_a_epi16(psi_r_p7_m3, a_r_p7_m3, psi_i_p7_m3, a_i_p7_m3, psi_a_p7_m3); - prodsum_psi_a_epi16(psi_r_p7_m5, a_r_p7_m5, psi_i_p7_m5, a_i_p7_m5, psi_a_p7_m5); - prodsum_psi_a_epi16(psi_r_p7_m7, a_r_p7_m7, psi_i_p7_m7, a_i_p7_m7, psi_a_p7_m7); - prodsum_psi_a_epi16(psi_r_p5_p7, a_r_p5_p7, psi_i_p5_p7, a_i_p5_p7, psi_a_p5_p7); - prodsum_psi_a_epi16(psi_r_p5_p5, a_r_p5_p5, psi_i_p5_p5, a_i_p5_p5, psi_a_p5_p5); - prodsum_psi_a_epi16(psi_r_p5_p3, a_r_p5_p3, psi_i_p5_p3, a_i_p5_p3, psi_a_p5_p3); - prodsum_psi_a_epi16(psi_r_p5_p1, a_r_p5_p1, psi_i_p5_p1, a_i_p5_p1, psi_a_p5_p1); - prodsum_psi_a_epi16(psi_r_p5_m1, a_r_p5_m1, psi_i_p5_m1, a_i_p5_m1, psi_a_p5_m1); - prodsum_psi_a_epi16(psi_r_p5_m3, a_r_p5_m3, psi_i_p5_m3, a_i_p5_m3, psi_a_p5_m3); - prodsum_psi_a_epi16(psi_r_p5_m5, a_r_p5_m5, psi_i_p5_m5, a_i_p5_m5, psi_a_p5_m5); - prodsum_psi_a_epi16(psi_r_p5_m7, a_r_p5_m7, psi_i_p5_m7, a_i_p5_m7, psi_a_p5_m7); - prodsum_psi_a_epi16(psi_r_p3_p7, a_r_p3_p7, psi_i_p3_p7, a_i_p3_p7, psi_a_p3_p7); - prodsum_psi_a_epi16(psi_r_p3_p5, a_r_p3_p5, psi_i_p3_p5, a_i_p3_p5, psi_a_p3_p5); - prodsum_psi_a_epi16(psi_r_p3_p3, a_r_p3_p3, psi_i_p3_p3, a_i_p3_p3, psi_a_p3_p3); - prodsum_psi_a_epi16(psi_r_p3_p1, a_r_p3_p1, psi_i_p3_p1, a_i_p3_p1, psi_a_p3_p1); - prodsum_psi_a_epi16(psi_r_p3_m1, a_r_p3_m1, psi_i_p3_m1, a_i_p3_m1, psi_a_p3_m1); - prodsum_psi_a_epi16(psi_r_p3_m3, a_r_p3_m3, psi_i_p3_m3, a_i_p3_m3, psi_a_p3_m3); - prodsum_psi_a_epi16(psi_r_p3_m5, a_r_p3_m5, psi_i_p3_m5, a_i_p3_m5, psi_a_p3_m5); - prodsum_psi_a_epi16(psi_r_p3_m7, a_r_p3_m7, psi_i_p3_m7, a_i_p3_m7, psi_a_p3_m7); - prodsum_psi_a_epi16(psi_r_p1_p7, a_r_p1_p7, psi_i_p1_p7, a_i_p1_p7, psi_a_p1_p7); - prodsum_psi_a_epi16(psi_r_p1_p5, a_r_p1_p5, psi_i_p1_p5, a_i_p1_p5, psi_a_p1_p5); - prodsum_psi_a_epi16(psi_r_p1_p3, a_r_p1_p3, psi_i_p1_p3, a_i_p1_p3, psi_a_p1_p3); - prodsum_psi_a_epi16(psi_r_p1_p1, a_r_p1_p1, psi_i_p1_p1, a_i_p1_p1, psi_a_p1_p1); - prodsum_psi_a_epi16(psi_r_p1_m1, a_r_p1_m1, psi_i_p1_m1, a_i_p1_m1, psi_a_p1_m1); - prodsum_psi_a_epi16(psi_r_p1_m3, a_r_p1_m3, psi_i_p1_m3, a_i_p1_m3, psi_a_p1_m3); - prodsum_psi_a_epi16(psi_r_p1_m5, a_r_p1_m5, psi_i_p1_m5, a_i_p1_m5, psi_a_p1_m5); - prodsum_psi_a_epi16(psi_r_p1_m7, a_r_p1_m7, psi_i_p1_m7, a_i_p1_m7, psi_a_p1_m7); - prodsum_psi_a_epi16(psi_r_m1_p7, a_r_m1_p7, psi_i_m1_p7, a_i_m1_p7, psi_a_m1_p7); - prodsum_psi_a_epi16(psi_r_m1_p5, a_r_m1_p5, psi_i_m1_p5, a_i_m1_p5, psi_a_m1_p5); - prodsum_psi_a_epi16(psi_r_m1_p3, a_r_m1_p3, psi_i_m1_p3, a_i_m1_p3, psi_a_m1_p3); - prodsum_psi_a_epi16(psi_r_m1_p1, a_r_m1_p1, psi_i_m1_p1, a_i_m1_p1, psi_a_m1_p1); - prodsum_psi_a_epi16(psi_r_m1_m1, a_r_m1_m1, psi_i_m1_m1, a_i_m1_m1, psi_a_m1_m1); - prodsum_psi_a_epi16(psi_r_m1_m3, a_r_m1_m3, psi_i_m1_m3, a_i_m1_m3, psi_a_m1_m3); - prodsum_psi_a_epi16(psi_r_m1_m5, a_r_m1_m5, psi_i_m1_m5, a_i_m1_m5, psi_a_m1_m5); - prodsum_psi_a_epi16(psi_r_m1_m7, a_r_m1_m7, psi_i_m1_m7, a_i_m1_m7, psi_a_m1_m7); - prodsum_psi_a_epi16(psi_r_m3_p7, a_r_m3_p7, psi_i_m3_p7, a_i_m3_p7, psi_a_m3_p7); - prodsum_psi_a_epi16(psi_r_m3_p5, a_r_m3_p5, psi_i_m3_p5, a_i_m3_p5, psi_a_m3_p5); - prodsum_psi_a_epi16(psi_r_m3_p3, a_r_m3_p3, psi_i_m3_p3, a_i_m3_p3, psi_a_m3_p3); - prodsum_psi_a_epi16(psi_r_m3_p1, a_r_m3_p1, psi_i_m3_p1, a_i_m3_p1, psi_a_m3_p1); - prodsum_psi_a_epi16(psi_r_m3_m1, a_r_m3_m1, psi_i_m3_m1, a_i_m3_m1, psi_a_m3_m1); - prodsum_psi_a_epi16(psi_r_m3_m3, a_r_m3_m3, psi_i_m3_m3, a_i_m3_m3, psi_a_m3_m3); - prodsum_psi_a_epi16(psi_r_m3_m5, a_r_m3_m5, psi_i_m3_m5, a_i_m3_m5, psi_a_m3_m5); - prodsum_psi_a_epi16(psi_r_m3_m7, a_r_m3_m7, psi_i_m3_m7, a_i_m3_m7, psi_a_m3_m7); - prodsum_psi_a_epi16(psi_r_m5_p7, a_r_m5_p7, psi_i_m5_p7, a_i_m5_p7, psi_a_m5_p7); - prodsum_psi_a_epi16(psi_r_m5_p5, a_r_m5_p5, psi_i_m5_p5, a_i_m5_p5, psi_a_m5_p5); - prodsum_psi_a_epi16(psi_r_m5_p3, a_r_m5_p3, psi_i_m5_p3, a_i_m5_p3, psi_a_m5_p3); - prodsum_psi_a_epi16(psi_r_m5_p1, a_r_m5_p1, psi_i_m5_p1, a_i_m5_p1, psi_a_m5_p1); - prodsum_psi_a_epi16(psi_r_m5_m1, a_r_m5_m1, psi_i_m5_m1, a_i_m5_m1, psi_a_m5_m1); - prodsum_psi_a_epi16(psi_r_m5_m3, a_r_m5_m3, psi_i_m5_m3, a_i_m5_m3, psi_a_m5_m3); - prodsum_psi_a_epi16(psi_r_m5_m5, a_r_m5_m5, psi_i_m5_m5, a_i_m5_m5, psi_a_m5_m5); - prodsum_psi_a_epi16(psi_r_m5_m7, a_r_m5_m7, psi_i_m5_m7, a_i_m5_m7, psi_a_m5_m7); - prodsum_psi_a_epi16(psi_r_m7_p7, a_r_m7_p7, psi_i_m7_p7, a_i_m7_p7, psi_a_m7_p7); - prodsum_psi_a_epi16(psi_r_m7_p5, a_r_m7_p5, psi_i_m7_p5, a_i_m7_p5, psi_a_m7_p5); - prodsum_psi_a_epi16(psi_r_m7_p3, a_r_m7_p3, psi_i_m7_p3, a_i_m7_p3, psi_a_m7_p3); - prodsum_psi_a_epi16(psi_r_m7_p1, a_r_m7_p1, psi_i_m7_p1, a_i_m7_p1, psi_a_m7_p1); - prodsum_psi_a_epi16(psi_r_m7_m1, a_r_m7_m1, psi_i_m7_m1, a_i_m7_m1, psi_a_m7_m1); - prodsum_psi_a_epi16(psi_r_m7_m3, a_r_m7_m3, psi_i_m7_m3, a_i_m7_m3, psi_a_m7_m3); - prodsum_psi_a_epi16(psi_r_m7_m5, a_r_m7_m5, psi_i_m7_m5, a_i_m7_m5, psi_a_m7_m5); - prodsum_psi_a_epi16(psi_r_m7_m7, a_r_m7_m7, psi_i_m7_m7, a_i_m7_m7, psi_a_m7_m7); - - // Calculation of a group of two terms in the bit metric involving squares of interference - square_a_epi16(a_r_p7_p7, a_i_p7_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p7); - square_a_epi16(a_r_p7_p5, a_i_p7_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p5); - square_a_epi16(a_r_p7_p3, a_i_p7_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p3); - square_a_epi16(a_r_p7_p1, a_i_p7_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p1); - square_a_epi16(a_r_p7_m1, a_i_p7_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m1); - square_a_epi16(a_r_p7_m3, a_i_p7_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m3); - square_a_epi16(a_r_p7_m5, a_i_p7_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m5); - square_a_epi16(a_r_p7_m7, a_i_p7_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m7); - square_a_epi16(a_r_p5_p7, a_i_p5_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p7); - square_a_epi16(a_r_p5_p5, a_i_p5_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p5); - square_a_epi16(a_r_p5_p3, a_i_p5_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p3); - square_a_epi16(a_r_p5_p1, a_i_p5_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p1); - square_a_epi16(a_r_p5_m1, a_i_p5_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m1); - square_a_epi16(a_r_p5_m3, a_i_p5_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m3); - square_a_epi16(a_r_p5_m5, a_i_p5_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m5); - square_a_epi16(a_r_p5_m7, a_i_p5_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m7); - square_a_epi16(a_r_p3_p7, a_i_p3_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p7); - square_a_epi16(a_r_p3_p5, a_i_p3_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p5); - square_a_epi16(a_r_p3_p3, a_i_p3_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p3); - square_a_epi16(a_r_p3_p1, a_i_p3_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p1); - square_a_epi16(a_r_p3_m1, a_i_p3_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m1); - square_a_epi16(a_r_p3_m3, a_i_p3_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m3); - square_a_epi16(a_r_p3_m5, a_i_p3_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m5); - square_a_epi16(a_r_p3_m7, a_i_p3_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m7); - square_a_epi16(a_r_p1_p7, a_i_p1_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p7); - square_a_epi16(a_r_p1_p5, a_i_p1_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p5); - square_a_epi16(a_r_p1_p3, a_i_p1_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p3); - square_a_epi16(a_r_p1_p1, a_i_p1_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p1); - square_a_epi16(a_r_p1_m1, a_i_p1_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m1); - square_a_epi16(a_r_p1_m3, a_i_p1_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m3); - square_a_epi16(a_r_p1_m5, a_i_p1_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m5); - square_a_epi16(a_r_p1_m7, a_i_p1_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m7); - square_a_epi16(a_r_m1_p7, a_i_m1_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p7); - square_a_epi16(a_r_m1_p5, a_i_m1_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p5); - square_a_epi16(a_r_m1_p3, a_i_m1_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p3); - square_a_epi16(a_r_m1_p1, a_i_m1_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p1); - square_a_epi16(a_r_m1_m1, a_i_m1_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m1); - square_a_epi16(a_r_m1_m3, a_i_m1_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m3); - square_a_epi16(a_r_m1_m5, a_i_m1_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m5); - square_a_epi16(a_r_m1_m7, a_i_m1_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m7); - square_a_epi16(a_r_m3_p7, a_i_m3_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p7); - square_a_epi16(a_r_m3_p5, a_i_m3_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p5); - square_a_epi16(a_r_m3_p3, a_i_m3_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p3); - square_a_epi16(a_r_m3_p1, a_i_m3_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p1); - square_a_epi16(a_r_m3_m1, a_i_m3_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m1); - square_a_epi16(a_r_m3_m3, a_i_m3_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m3); - square_a_epi16(a_r_m3_m5, a_i_m3_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m5); - square_a_epi16(a_r_m3_m7, a_i_m3_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m7); - square_a_epi16(a_r_m5_p7, a_i_m5_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p7); - square_a_epi16(a_r_m5_p5, a_i_m5_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p5); - square_a_epi16(a_r_m5_p3, a_i_m5_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p3); - square_a_epi16(a_r_m5_p1, a_i_m5_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p1); - square_a_epi16(a_r_m5_m1, a_i_m5_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m1); - square_a_epi16(a_r_m5_m3, a_i_m5_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m3); - square_a_epi16(a_r_m5_m5, a_i_m5_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m5); - square_a_epi16(a_r_m5_m7, a_i_m5_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m7); - square_a_epi16(a_r_m7_p7, a_i_m7_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p7); - square_a_epi16(a_r_m7_p5, a_i_m7_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p5); - square_a_epi16(a_r_m7_p3, a_i_m7_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p3); - square_a_epi16(a_r_m7_p1, a_i_m7_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p1); - square_a_epi16(a_r_m7_m1, a_i_m7_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m1); - square_a_epi16(a_r_m7_m3, a_i_m7_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m3); - square_a_epi16(a_r_m7_m5, a_i_m7_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m5); - square_a_epi16(a_r_m7_m7, a_i_m7_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m7); - - // Computing different multiples of ||h0||^2 - // x=1, y=1 - ch_mag_2_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,ONE_OVER_FOUR_SQRT_42); - ch_mag_2_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_2_over_42_with_sigma2,1); - // x=1, y=3 - ch_mag_10_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,FIVE_OVER_FOUR_SQRT_42); - ch_mag_10_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_10_over_42_with_sigma2,1); - // x=1, x=5 - ch_mag_26_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,THIRTEEN_OVER_FOUR_SQRT_42); - ch_mag_26_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_26_over_42_with_sigma2,1); - // x=1, y=7 - ch_mag_50_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42); - ch_mag_50_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_50_over_42_with_sigma2,1); - // x=3, y=3 - ch_mag_18_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,NINE_OVER_FOUR_SQRT_42); - ch_mag_18_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_18_over_42_with_sigma2,1); - // x=3, y=5 - ch_mag_34_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,SEVENTEEN_OVER_FOUR_SQRT_42); - ch_mag_34_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_34_over_42_with_sigma2,1); - // x=3, y=7 - ch_mag_58_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,TWENTYNINE_OVER_FOUR_SQRT_42); - ch_mag_58_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_58_over_42_with_sigma2,2); - // x=5, y=5 - ch_mag_50_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42); - ch_mag_50_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_50_over_42_with_sigma2,1); - // x=5, y=7 - ch_mag_74_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,THIRTYSEVEN_OVER_FOUR_SQRT_42); - ch_mag_74_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_74_over_42_with_sigma2,2); - // x=7, y=7 - ch_mag_98_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,FORTYNINE_OVER_FOUR_SQRT_42); - ch_mag_98_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_98_over_42_with_sigma2,2); - - // Computing Metrics - xmm0 = _mm_subs_epi16(psi_a_p7_p7, a_sq_p7_p7); - xmm1 = _mm_adds_epi16(xmm0, y0_p_7_7); - bit_met_p7_p7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p7_p5, a_sq_p7_p5); - xmm1 = _mm_adds_epi16(xmm0, y0_p_7_5); - bit_met_p7_p5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p7_p3, a_sq_p7_p3); - xmm1 = _mm_adds_epi16(xmm0, y0_p_7_3); - bit_met_p7_p3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p7_p1, a_sq_p7_p1); - xmm1 = _mm_adds_epi16(xmm0, y0_p_7_1); - bit_met_p7_p1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p7_m1, a_sq_p7_m1); - xmm1 = _mm_adds_epi16(xmm0, y0_m_7_1); - bit_met_p7_m1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p7_m3, a_sq_p7_m3); - xmm1 = _mm_adds_epi16(xmm0, y0_m_7_3); - bit_met_p7_m3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p7_m5, a_sq_p7_m5); - xmm1 = _mm_adds_epi16(xmm0, y0_m_7_5); - bit_met_p7_m5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p7_m7, a_sq_p7_m7); - xmm1 = _mm_adds_epi16(xmm0, y0_m_7_7); - bit_met_p7_m7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p5_p7, a_sq_p5_p7); - xmm1 = _mm_adds_epi16(xmm0, y0_p_5_7); - bit_met_p5_p7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p5_p5, a_sq_p5_p5); - xmm1 = _mm_adds_epi16(xmm0, y0_p_5_5); - bit_met_p5_p5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p5_p3, a_sq_p5_p3); - xmm1 = _mm_adds_epi16(xmm0, y0_p_5_3); - bit_met_p5_p3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p5_p1, a_sq_p5_p1); - xmm1 = _mm_adds_epi16(xmm0, y0_p_5_1); - bit_met_p5_p1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p5_m1, a_sq_p5_m1); - xmm1 = _mm_adds_epi16(xmm0, y0_m_5_1); - bit_met_p5_m1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p5_m3, a_sq_p5_m3); - xmm1 = _mm_adds_epi16(xmm0, y0_m_5_3); - bit_met_p5_m3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p5_m5, a_sq_p5_m5); - xmm1 = _mm_adds_epi16(xmm0, y0_m_5_5); - bit_met_p5_m5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p5_m7, a_sq_p5_m7); - xmm1 = _mm_adds_epi16(xmm0, y0_m_5_7); - bit_met_p5_m7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p3_p7, a_sq_p3_p7); - xmm1 = _mm_adds_epi16(xmm0, y0_p_3_7); - bit_met_p3_p7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p3_p5, a_sq_p3_p5); - xmm1 = _mm_adds_epi16(xmm0, y0_p_3_5); - bit_met_p3_p5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p3_p3, a_sq_p3_p3); - xmm1 = _mm_adds_epi16(xmm0, y0_p_3_3); - bit_met_p3_p3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p3_p1, a_sq_p3_p1); - xmm1 = _mm_adds_epi16(xmm0, y0_p_3_1); - bit_met_p3_p1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p3_m1, a_sq_p3_m1); - xmm1 = _mm_adds_epi16(xmm0, y0_m_3_1); - bit_met_p3_m1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p3_m3, a_sq_p3_m3); - xmm1 = _mm_adds_epi16(xmm0, y0_m_3_3); - bit_met_p3_m3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p3_m5, a_sq_p3_m5); - xmm1 = _mm_adds_epi16(xmm0, y0_m_3_5); - bit_met_p3_m5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p3_m7, a_sq_p3_m7); - xmm1 = _mm_adds_epi16(xmm0, y0_m_3_7); - bit_met_p3_m7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p1_p7, a_sq_p1_p7); - xmm1 = _mm_adds_epi16(xmm0, y0_p_1_7); - bit_met_p1_p7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p1_p5, a_sq_p1_p5); - xmm1 = _mm_adds_epi16(xmm0, y0_p_1_5); - bit_met_p1_p5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p1_p3, a_sq_p1_p3); - xmm1 = _mm_adds_epi16(xmm0, y0_p_1_3); - bit_met_p1_p3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p1_p1, a_sq_p1_p1); - xmm1 = _mm_adds_epi16(xmm0, y0_p_1_1); - bit_met_p1_p1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p1_m1, a_sq_p1_m1); - xmm1 = _mm_adds_epi16(xmm0, y0_m_1_1); - bit_met_p1_m1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p1_m3, a_sq_p1_m3); - xmm1 = _mm_adds_epi16(xmm0, y0_m_1_3); - bit_met_p1_m3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p1_m5, a_sq_p1_m5); - xmm1 = _mm_adds_epi16(xmm0, y0_m_1_5); - bit_met_p1_m5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p1_m7, a_sq_p1_m7); - xmm1 = _mm_adds_epi16(xmm0, y0_m_1_7); - bit_met_p1_m7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - - xmm0 = _mm_subs_epi16(psi_a_m1_p7, a_sq_m1_p7); - xmm1 = _mm_subs_epi16(xmm0, y0_m_1_7); - bit_met_m1_p7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m1_p5, a_sq_m1_p5); - xmm1 = _mm_subs_epi16(xmm0, y0_m_1_5); - bit_met_m1_p5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m1_p3, a_sq_m1_p3); - xmm1 = _mm_subs_epi16(xmm0, y0_m_1_3); - bit_met_m1_p3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m1_p1, a_sq_m1_p1); - xmm1 = _mm_subs_epi16(xmm0, y0_m_1_1); - bit_met_m1_p1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m1_m1, a_sq_m1_m1); - xmm1 = _mm_subs_epi16(xmm0, y0_p_1_1); - bit_met_m1_m1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m1_m3, a_sq_m1_m3); - xmm1 = _mm_subs_epi16(xmm0, y0_p_1_3); - bit_met_m1_m3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m1_m5, a_sq_m1_m5); - xmm1 = _mm_subs_epi16(xmm0, y0_p_1_5); - bit_met_m1_m5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m1_m7, a_sq_m1_m7); - xmm1 = _mm_subs_epi16(xmm0, y0_p_1_7); - bit_met_m1_m7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m3_p7, a_sq_m3_p7); - xmm1 = _mm_subs_epi16(xmm0, y0_m_3_7); - bit_met_m3_p7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m3_p5, a_sq_m3_p5); - xmm1 = _mm_subs_epi16(xmm0, y0_m_3_5); - bit_met_m3_p5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m3_p3, a_sq_m3_p3); - xmm1 = _mm_subs_epi16(xmm0, y0_m_3_3); - bit_met_m3_p3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m3_p1, a_sq_m3_p1); - xmm1 = _mm_subs_epi16(xmm0, y0_m_3_1); - bit_met_m3_p1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m3_m1, a_sq_m3_m1); - xmm1 = _mm_subs_epi16(xmm0, y0_p_3_1); - bit_met_m3_m1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m3_m3, a_sq_m3_m3); - xmm1 = _mm_subs_epi16(xmm0, y0_p_3_3); - bit_met_m3_m3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m3_m5, a_sq_m3_m5); - xmm1 = _mm_subs_epi16(xmm0, y0_p_3_5); - bit_met_m3_m5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m3_m7, a_sq_m3_m7); - xmm1 = _mm_subs_epi16(xmm0, y0_p_3_7); - bit_met_m3_m7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m5_p7, a_sq_m5_p7); - xmm1 = _mm_subs_epi16(xmm0, y0_m_5_7); - bit_met_m5_p7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m5_p5, a_sq_m5_p5); - xmm1 = _mm_subs_epi16(xmm0, y0_m_5_5); - bit_met_m5_p5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m5_p3, a_sq_m5_p3); - xmm1 = _mm_subs_epi16(xmm0, y0_m_5_3); - bit_met_m5_p3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m5_p1, a_sq_m5_p1); - xmm1 = _mm_subs_epi16(xmm0, y0_m_5_1); - bit_met_m5_p1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m5_m1, a_sq_m5_m1); - xmm1 = _mm_subs_epi16(xmm0, y0_p_5_1); - bit_met_m5_m1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m5_m3, a_sq_m5_m3); - xmm1 = _mm_subs_epi16(xmm0, y0_p_5_3); - bit_met_m5_m3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m5_m5, a_sq_m5_m5); - xmm1 = _mm_subs_epi16(xmm0, y0_p_5_5); - bit_met_m5_m5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m5_m7, a_sq_m5_m7); - xmm1 = _mm_subs_epi16(xmm0, y0_p_5_7); - bit_met_m5_m7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m7_p7, a_sq_m7_p7); - xmm1 = _mm_subs_epi16(xmm0, y0_m_7_7); - bit_met_m7_p7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m7_p5, a_sq_m7_p5); - xmm1 = _mm_subs_epi16(xmm0, y0_m_7_5); - bit_met_m7_p5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m7_p3, a_sq_m7_p3); - xmm1 = _mm_subs_epi16(xmm0, y0_m_7_3); - bit_met_m7_p3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m7_p1, a_sq_m7_p1); - xmm1 = _mm_subs_epi16(xmm0, y0_m_7_1); - bit_met_m7_p1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m7_m1, a_sq_m7_m1); - xmm1 = _mm_subs_epi16(xmm0, y0_p_7_1); - bit_met_m7_m1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m7_m3, a_sq_m7_m3); - xmm1 = _mm_subs_epi16(xmm0, y0_p_7_3); - bit_met_m7_m3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m7_m5, a_sq_m7_m5); - xmm1 = _mm_subs_epi16(xmm0, y0_p_7_5); - bit_met_m7_m5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m7_m7, a_sq_m7_m7); - xmm1 = _mm_subs_epi16(xmm0, y0_p_7_7); - bit_met_m7_m7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); - - // Detection for 1st bit (LTE mapping) - // bit = 1 - xmm0 = _mm_max_epi16(bit_met_m7_p7, bit_met_m7_p5); - xmm1 = _mm_max_epi16(bit_met_m7_p3, bit_met_m7_p1); - xmm2 = _mm_max_epi16(bit_met_m7_m1, bit_met_m7_m3); - xmm3 = _mm_max_epi16(bit_met_m7_m5, bit_met_m7_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_m5_p7, bit_met_m5_p5); - xmm1 = _mm_max_epi16(bit_met_m5_p3, bit_met_m5_p1); - xmm2 = _mm_max_epi16(bit_met_m5_m1, bit_met_m5_m3); - xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m5_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_m3_p7, bit_met_m3_p5); - xmm1 = _mm_max_epi16(bit_met_m3_p3, bit_met_m3_p1); - xmm2 = _mm_max_epi16(bit_met_m3_m1, bit_met_m3_m3); - xmm3 = _mm_max_epi16(bit_met_m3_m5, bit_met_m3_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_m1_p7, bit_met_m1_p5); - xmm1 = _mm_max_epi16(bit_met_m1_p3, bit_met_m1_p1); - xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m1_m3); - xmm3 = _mm_max_epi16(bit_met_m1_m5, bit_met_m1_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - - // bit = 0 - xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p7_p5); - xmm1 = _mm_max_epi16(bit_met_p7_p3, bit_met_p7_p1); - xmm2 = _mm_max_epi16(bit_met_p7_m1, bit_met_p7_m3); - xmm3 = _mm_max_epi16(bit_met_p7_m5, bit_met_p7_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_p5_p7, bit_met_p5_p5); - xmm1 = _mm_max_epi16(bit_met_p5_p3, bit_met_p5_p1); - xmm2 = _mm_max_epi16(bit_met_p5_m1, bit_met_p5_m3); - xmm3 = _mm_max_epi16(bit_met_p5_m5, bit_met_p5_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p3_p7, bit_met_p3_p5); - xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p3_p1); - xmm2 = _mm_max_epi16(bit_met_p3_m1, bit_met_p3_m3); - xmm3 = _mm_max_epi16(bit_met_p3_m5, bit_met_p3_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p1_p7, bit_met_p1_p5); - xmm1 = _mm_max_epi16(bit_met_p1_p3, bit_met_p1_p1); - xmm2 = _mm_max_epi16(bit_met_p1_m1, bit_met_p1_m3); - xmm3 = _mm_max_epi16(bit_met_p1_m5, bit_met_p1_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - - y0r = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); - - // Detection for 2nd bit (LTE mapping) - // bit = 1 - xmm0 = _mm_max_epi16(bit_met_p7_m1, bit_met_p5_m1); - xmm1 = _mm_max_epi16(bit_met_p3_m1, bit_met_p1_m1); - xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m3_m1); - xmm3 = _mm_max_epi16(bit_met_m5_m1, bit_met_m7_m1); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m3, bit_met_p5_m3); - xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p1_m3); - xmm2 = _mm_max_epi16(bit_met_m1_m3, bit_met_m3_m3); - xmm3 = _mm_max_epi16(bit_met_m5_m3, bit_met_m7_m3); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m5, bit_met_p5_m5); - xmm1 = _mm_max_epi16(bit_met_p3_m5, bit_met_p1_m5); - xmm2 = _mm_max_epi16(bit_met_m1_m5, bit_met_m3_m5); - xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m7_m5); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p5_m7); - xmm1 = _mm_max_epi16(bit_met_p3_m7, bit_met_p1_m7); - xmm2 = _mm_max_epi16(bit_met_m1_m7, bit_met_m3_m7); - xmm3 = _mm_max_epi16(bit_met_m5_m7, bit_met_m7_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - - // bit = 0 - xmm0 = _mm_max_epi16(bit_met_p7_p1, bit_met_p5_p1); - xmm1 = _mm_max_epi16(bit_met_p3_p1, bit_met_p1_p1); - xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m3_p1); - xmm3 = _mm_max_epi16(bit_met_m5_p1, bit_met_m7_p1); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_p3, bit_met_p5_p3); - xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p1_p3); - xmm2 = _mm_max_epi16(bit_met_m1_p3, bit_met_m3_p3); - xmm3 = _mm_max_epi16(bit_met_m5_p3, bit_met_m7_p3); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_p5, bit_met_p5_p5); - xmm1 = _mm_max_epi16(bit_met_p3_p5, bit_met_p1_p5); - xmm2 = _mm_max_epi16(bit_met_m1_p5, bit_met_m3_p5); - xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m7_p5); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p5_p7); - xmm1 = _mm_max_epi16(bit_met_p3_p7, bit_met_p1_p7); - xmm2 = _mm_max_epi16(bit_met_m1_p7, bit_met_m3_p7); - xmm3 = _mm_max_epi16(bit_met_m5_p7, bit_met_m7_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - - y1r = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); - - // Detection for 3rd bit (LTE mapping) - xmm0 = _mm_max_epi16(bit_met_m7_m7, bit_met_m7_m5); - xmm1 = _mm_max_epi16(bit_met_m7_m3, bit_met_m7_m1); - xmm2 = _mm_max_epi16(bit_met_m7_p1, bit_met_m7_p3); - xmm3 = _mm_max_epi16(bit_met_m7_p5, bit_met_m7_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_m5_m7, bit_met_m5_m5); - xmm1 = _mm_max_epi16(bit_met_m5_m3, bit_met_m5_m1); - xmm2 = _mm_max_epi16(bit_met_m5_p1, bit_met_m5_p3); - xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m5_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p5_m7, bit_met_p5_m5); - xmm1 = _mm_max_epi16(bit_met_p5_m3, bit_met_p5_m1); - xmm2 = _mm_max_epi16(bit_met_p5_p1, bit_met_p5_p3); - xmm3 = _mm_max_epi16(bit_met_p5_p5, bit_met_p5_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p7_m5); - xmm1 = _mm_max_epi16(bit_met_p7_m3, bit_met_p7_m1); - xmm2 = _mm_max_epi16(bit_met_p7_p1, bit_met_p7_p3); - xmm3 = _mm_max_epi16(bit_met_p7_p5, bit_met_p7_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - - xmm0 = _mm_max_epi16(bit_met_m3_m7, bit_met_m3_m5); - xmm1 = _mm_max_epi16(bit_met_m3_m3, bit_met_m3_m1); - xmm2 = _mm_max_epi16(bit_met_m3_p1, bit_met_m3_p3); - xmm3 = _mm_max_epi16(bit_met_m3_p5, bit_met_m3_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_m1_m7, bit_met_m1_m5); - xmm1 = _mm_max_epi16(bit_met_m1_m3, bit_met_m1_m1); - xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m1_p3); - xmm3 = _mm_max_epi16(bit_met_m1_p5, bit_met_m1_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p1_m7, bit_met_p1_m5); - xmm1 = _mm_max_epi16(bit_met_p1_m3, bit_met_p1_m1); - xmm2 = _mm_max_epi16(bit_met_p1_p1, bit_met_p1_p3); - xmm3 = _mm_max_epi16(bit_met_p1_p5, bit_met_p1_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p3_m7, bit_met_p3_m5); - xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p3_m1); - xmm2 = _mm_max_epi16(bit_met_p3_p1, bit_met_p3_p3); - xmm3 = _mm_max_epi16(bit_met_p3_p5, bit_met_p3_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - - y2r = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); - - // Detection for 4th bit (LTE mapping) - xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p5_p7); - xmm1 = _mm_max_epi16(bit_met_p3_p7, bit_met_p1_p7); - xmm2 = _mm_max_epi16(bit_met_m1_p7, bit_met_m3_p7); - xmm3 = _mm_max_epi16(bit_met_m5_p7, bit_met_m7_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_p5, bit_met_p5_p5); - xmm1 = _mm_max_epi16(bit_met_p3_p5, bit_met_p1_p5); - xmm2 = _mm_max_epi16(bit_met_m1_p5, bit_met_m3_p5); - xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m5_p5); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m5, bit_met_p5_m5); - xmm1 = _mm_max_epi16(bit_met_p3_m5, bit_met_p1_m5); - xmm2 = _mm_max_epi16(bit_met_m1_m5, bit_met_m3_m5); - xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m7_m5); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p5_m7); - xmm1 = _mm_max_epi16(bit_met_p3_m7, bit_met_p1_m7); - xmm2 = _mm_max_epi16(bit_met_m1_m7, bit_met_m3_m7); - xmm3 = _mm_max_epi16(bit_met_m5_m7, bit_met_m7_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - - xmm0 = _mm_max_epi16(bit_met_p7_m1, bit_met_p5_m1); - xmm1 = _mm_max_epi16(bit_met_p3_m1, bit_met_p1_m1); - xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m3_m1); - xmm3 = _mm_max_epi16(bit_met_m5_m1, bit_met_m7_m1); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m3, bit_met_p5_m3); - xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p1_m3); - xmm2 = _mm_max_epi16(bit_met_m1_m3, bit_met_m3_m3); - xmm3 = _mm_max_epi16(bit_met_m5_m3, bit_met_m7_m3); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_p1, bit_met_p5_p1); - xmm1 = _mm_max_epi16(bit_met_p3_p1, bit_met_p1_p1); - xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m3_p1); - xmm3 = _mm_max_epi16(bit_met_m5_p1, bit_met_m7_p1); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_p3, bit_met_p5_p3); - xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p1_p3); - xmm2 = _mm_max_epi16(bit_met_m1_p3, bit_met_m3_p3); - xmm3 = _mm_max_epi16(bit_met_m5_p3, bit_met_m7_p3); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - - y0i = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); - - - // Detection for 5th bit (LTE mapping) - xmm0 = _mm_max_epi16(bit_met_m7_m7, bit_met_m7_m5); - xmm1 = _mm_max_epi16(bit_met_m7_m3, bit_met_m7_m1); - xmm2 = _mm_max_epi16(bit_met_m7_p1, bit_met_m7_p3); - xmm3 = _mm_max_epi16(bit_met_m7_p5, bit_met_m7_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_m1_m7, bit_met_m1_m5); - xmm1 = _mm_max_epi16(bit_met_m1_m3, bit_met_m1_m1); - xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m1_p3); - xmm3 = _mm_max_epi16(bit_met_m1_p5, bit_met_m1_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p1_m7, bit_met_p1_m5); - xmm1 = _mm_max_epi16(bit_met_p1_m3, bit_met_p1_m1); - xmm2 = _mm_max_epi16(bit_met_p1_p1, bit_met_p1_p3); - xmm3 = _mm_max_epi16(bit_met_p1_p5, bit_met_p1_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p7_m5); - xmm1 = _mm_max_epi16(bit_met_p7_m3, bit_met_p7_m1); - xmm2 = _mm_max_epi16(bit_met_p7_p1, bit_met_p7_p3); - xmm3 = _mm_max_epi16(bit_met_p7_p5, bit_met_p7_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - - xmm0 = _mm_max_epi16(bit_met_m5_m7, bit_met_m5_m5); - xmm1 = _mm_max_epi16(bit_met_m5_m3, bit_met_m5_m1); - xmm2 = _mm_max_epi16(bit_met_m5_p1, bit_met_m5_p3); - xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m5_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_m3_m7, bit_met_m3_m5); - xmm1 = _mm_max_epi16(bit_met_m3_m3, bit_met_m3_m1); - xmm2 = _mm_max_epi16(bit_met_m3_p1, bit_met_m3_p3); - xmm3 = _mm_max_epi16(bit_met_m3_p5, bit_met_m3_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p3_m7, bit_met_p3_m5); - xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p3_m1); - xmm2 = _mm_max_epi16(bit_met_p3_p1, bit_met_p3_p3); - xmm3 = _mm_max_epi16(bit_met_p3_p5, bit_met_p3_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p5_m7, bit_met_p5_m5); - xmm1 = _mm_max_epi16(bit_met_p5_m3, bit_met_p5_m1); - xmm2 = _mm_max_epi16(bit_met_p5_p1, bit_met_p5_p3); - xmm3 = _mm_max_epi16(bit_met_p5_p5, bit_met_p5_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - - y1i = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); - - // Detection for 6th bit (LTE mapping) - xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p5_p7); - xmm1 = _mm_max_epi16(bit_met_p3_p7, bit_met_p1_p7); - xmm2 = _mm_max_epi16(bit_met_m1_p7, bit_met_m3_p7); - xmm3 = _mm_max_epi16(bit_met_m5_p7, bit_met_m7_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_p1, bit_met_p5_p1); - xmm1 = _mm_max_epi16(bit_met_p3_p1, bit_met_p1_p1); - xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m3_p1); - xmm3 = _mm_max_epi16(bit_met_m5_p1, bit_met_m5_p1); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m1, bit_met_p5_m1); - xmm1 = _mm_max_epi16(bit_met_p3_m1, bit_met_p1_m1); - xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m3_m1); - xmm3 = _mm_max_epi16(bit_met_m5_m1, bit_met_m7_m1); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p5_m7); - xmm1 = _mm_max_epi16(bit_met_p3_m7, bit_met_p1_m7); - xmm2 = _mm_max_epi16(bit_met_m1_m7, bit_met_m3_m7); - xmm3 = _mm_max_epi16(bit_met_m5_m7, bit_met_m7_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - - xmm0 = _mm_max_epi16(bit_met_p7_m5, bit_met_p5_m5); - xmm1 = _mm_max_epi16(bit_met_p3_m5, bit_met_p1_m5); - xmm2 = _mm_max_epi16(bit_met_m1_m5, bit_met_m3_m5); - xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m7_m5); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m3, bit_met_p5_m3); - xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p1_m3); - xmm2 = _mm_max_epi16(bit_met_m1_m3, bit_met_m3_m3); - xmm3 = _mm_max_epi16(bit_met_m5_m3, bit_met_m7_m3); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_p3, bit_met_p5_p3); - xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p1_p3); - xmm2 = _mm_max_epi16(bit_met_m1_p3, bit_met_m3_p3); - xmm3 = _mm_max_epi16(bit_met_m5_p3, bit_met_m7_p3); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_p5, bit_met_p5_p5); - xmm1 = _mm_max_epi16(bit_met_p3_p5, bit_met_p1_p5); - xmm2 = _mm_max_epi16(bit_met_m1_p5, bit_met_m3_p5); - xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m7_p5); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - - y2i = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); - - - // map to output stream, difficult to do in SIMD since we have 6 16bit LLRs - // RE 1 - j = 24*i; - stream0_out[j + 0] = ((short *)&y0r)[0]; - stream0_out[j + 1] = ((short *)&y1r)[0]; - stream0_out[j + 2] = ((short *)&y2r)[0]; - stream0_out[j + 3] = ((short *)&y0i)[0]; - stream0_out[j + 4] = ((short *)&y1i)[0]; - stream0_out[j + 5] = ((short *)&y2i)[0]; - // RE 2 - stream0_out[j + 6] = ((short *)&y0r)[1]; - stream0_out[j + 7] = ((short *)&y1r)[1]; - stream0_out[j + 8] = ((short *)&y2r)[1]; - stream0_out[j + 9] = ((short *)&y0i)[1]; - stream0_out[j + 10] = ((short *)&y1i)[1]; - stream0_out[j + 11] = ((short *)&y2i)[1]; - // RE 3 - stream0_out[j + 12] = ((short *)&y0r)[2]; - stream0_out[j + 13] = ((short *)&y1r)[2]; - stream0_out[j + 14] = ((short *)&y2r)[2]; - stream0_out[j + 15] = ((short *)&y0i)[2]; - stream0_out[j + 16] = ((short *)&y1i)[2]; - stream0_out[j + 17] = ((short *)&y2i)[2]; - // RE 4 - stream0_out[j + 18] = ((short *)&y0r)[3]; - stream0_out[j + 19] = ((short *)&y1r)[3]; - stream0_out[j + 20] = ((short *)&y2r)[3]; - stream0_out[j + 21] = ((short *)&y0i)[3]; - stream0_out[j + 22] = ((short *)&y1i)[3]; - stream0_out[j + 23] = ((short *)&y2i)[3]; - // RE 5 - stream0_out[j + 24] = ((short *)&y0r)[4]; - stream0_out[j + 25] = ((short *)&y1r)[4]; - stream0_out[j + 26] = ((short *)&y2r)[4]; - stream0_out[j + 27] = ((short *)&y0i)[4]; - stream0_out[j + 28] = ((short *)&y1i)[4]; - stream0_out[j + 29] = ((short *)&y2i)[4]; - // RE 6 - stream0_out[j + 30] = ((short *)&y0r)[5]; - stream0_out[j + 31] = ((short *)&y1r)[5]; - stream0_out[j + 32] = ((short *)&y2r)[5]; - stream0_out[j + 33] = ((short *)&y0i)[5]; - stream0_out[j + 34] = ((short *)&y1i)[5]; - stream0_out[j + 35] = ((short *)&y2i)[5]; - // RE 7 - stream0_out[j + 36] = ((short *)&y0r)[6]; - stream0_out[j + 37] = ((short *)&y1r)[6]; - stream0_out[j + 38] = ((short *)&y2r)[6]; - stream0_out[j + 39] = ((short *)&y0i)[6]; - stream0_out[j + 40] = ((short *)&y1i)[6]; - stream0_out[j + 41] = ((short *)&y2i)[6]; - // RE 8 - stream0_out[j + 42] = ((short *)&y0r)[7]; - stream0_out[j + 43] = ((short *)&y1r)[7]; - stream0_out[j + 44] = ((short *)&y2r)[7]; - stream0_out[j + 45] = ((short *)&y0i)[7]; - stream0_out[j + 46] = ((short *)&y1i)[7]; - stream0_out[j + 47] = ((short *)&y2i)[7]; - -#elif defined(__arm__) || defined(__aarch64__) - -#endif - } - -#if defined(__x86_64__) || defined(__i386__) - _mm_empty(); - _m_empty(); -#endif - -} - - -int nr_dlsch_64qam_16qam_llr(NR_DL_FRAME_PARMS *frame_parms, - int32_t **rxdataF_comp, - int32_t **rxdataF_comp_i, - int32_t **dl_ch_mag, - int32_t **dl_ch_mag_i, - int32_t **rho_i, - int16_t *dlsch_llr, - uint8_t symbol, - uint8_t first_symbol_flag, - uint16_t nb_rb, - uint16_t pbch_pss_sss_adjust, - int16_t **llr16p) -{ - - int16_t *rxF = (int16_t*)&rxdataF_comp[0][(symbol*frame_parms->N_RB_DL*12)]; - int16_t *rxF_i = (int16_t*)&rxdataF_comp_i[0][(symbol*frame_parms->N_RB_DL*12)]; - int16_t *ch_mag = (int16_t*)&dl_ch_mag[0][(symbol*frame_parms->N_RB_DL*12)]; - int16_t *ch_mag_i = (int16_t*)&dl_ch_mag_i[0][(symbol*frame_parms->N_RB_DL*12)]; - int16_t *rho = (int16_t*)&rho_i[0][(symbol*frame_parms->N_RB_DL*12)]; - int16_t *llr16; - int len; - uint8_t symbol_mod = (symbol >= (7-frame_parms->Ncp))? (symbol-(7-frame_parms->Ncp)) : symbol; - - //first symbol has different structure due to more pilots - if (first_symbol_flag == 1) { - llr16 = (int16_t*)dlsch_llr; - } else { - llr16 = (int16_t*)(*llr16p); - } - - AssertFatal(llr16!=NULL,"nr_dlsch_16qam_64qam_llr:llr is null, symbol %d\n",symbol); - - if ((symbol_mod==0) || (symbol_mod==(4-frame_parms->Ncp))) { - // if symbol has pilots - if (frame_parms->nb_antenna_ports_gNB!=1) - // in 2 antenna ports we have 8 REs per symbol per RB - len = (nb_rb*8) - (2*pbch_pss_sss_adjust/3); - else - // for 1 antenna port we have 10 REs per symbol per RB - len = (nb_rb*10) - (5*pbch_pss_sss_adjust/6); - } else { - // symbol has no pilots - len = (nb_rb*12) - pbch_pss_sss_adjust; - } - - nr_qam64_qam16((short *)rxF, - (short *)rxF_i, - (short *)ch_mag, - (short *)ch_mag_i, - (short *)llr16, - (short *)rho, - len); - - llr16 += (6*len); - *llr16p = (short *)llr16; - return(0); -} - -#if 0 -void qam64_qam64(short *stream0_in, - short *stream1_in, - short *ch_mag, - short *ch_mag_i, - short *stream0_out, - short *rho01, - int length - ) -{ - - /* - Author: S. Wagner - Date: 31-07-12 - - Input: - stream0_in: MF filter for 1st stream, i.e., y0=h0'*y - stream1_in: MF filter for 2nd stream, i.e., y1=h1'*y - ch_mag: 4*h0/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc - ch_mag_i: 4*h1/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc - rho01: Channel cross correlation, i.e., h1'*h0 - - Output: - stream0_out: output LLRs for 1st stream - */ - -#if defined(__x86_64__) || defined(__i386__) - - __m128i *rho01_128i = (__m128i *)rho01; - __m128i *stream0_128i_in = (__m128i *)stream0_in; - __m128i *stream1_128i_in = (__m128i *)stream1_in; - __m128i *ch_mag_128i = (__m128i *)ch_mag; - __m128i *ch_mag_128i_i = (__m128i *)ch_mag_i; - - __m128i ONE_OVER_SQRT_42 = _mm_set1_epi16(10112); // round(1/sqrt(42)*2^16) - __m128i THREE_OVER_SQRT_42 = _mm_set1_epi16(30337); // round(3/sqrt(42)*2^16) - __m128i FIVE_OVER_SQRT_42 = _mm_set1_epi16(25281); // round(5/sqrt(42)*2^15) - __m128i SEVEN_OVER_SQRT_42 = _mm_set1_epi16(17697); // round(7/sqrt(42)*2^14) Q2.14 - __m128i ONE_OVER_SQRT_2 = _mm_set1_epi16(23170); // round(1/sqrt(2)*2^15) - __m128i ONE_OVER_SQRT_2_42 = _mm_set1_epi16(3575); // round(1/sqrt(2*42)*2^15) - __m128i THREE_OVER_SQRT_2_42 = _mm_set1_epi16(10726); // round(3/sqrt(2*42)*2^15) - __m128i FIVE_OVER_SQRT_2_42 = _mm_set1_epi16(17876); // round(5/sqrt(2*42)*2^15) - __m128i SEVEN_OVER_SQRT_2_42 = _mm_set1_epi16(25027); // round(7/sqrt(2*42)*2^15) - __m128i FORTYNINE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(30969); // round(49/(4*sqrt(42))*2^14), Q2.14 - __m128i THIRTYSEVEN_OVER_FOUR_SQRT_42 = _mm_set1_epi16(23385); // round(37/(4*sqrt(42))*2^14), Q2.14 - __m128i TWENTYFIVE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(31601); // round(25/(4*sqrt(42))*2^15) - __m128i TWENTYNINE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(18329); // round(29/(4*sqrt(42))*2^15), Q2.14 - __m128i SEVENTEEN_OVER_FOUR_SQRT_42 = _mm_set1_epi16(21489); // round(17/(4*sqrt(42))*2^15) - __m128i NINE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(11376); // round(9/(4*sqrt(42))*2^15) - __m128i THIRTEEN_OVER_FOUR_SQRT_42 = _mm_set1_epi16(16433); // round(13/(4*sqrt(42))*2^15) - __m128i FIVE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(6320); // round(5/(4*sqrt(42))*2^15) - __m128i ONE_OVER_FOUR_SQRT_42 = _mm_set1_epi16(1264); // round(1/(4*sqrt(42))*2^15) - __m128i SQRT_42_OVER_FOUR = _mm_set1_epi16(13272); // round(sqrt(42)/4*2^13), Q3.12 - - __m128i ch_mag_des; - __m128i ch_mag_int; - __m128i ch_mag_98_over_42_with_sigma2; - __m128i ch_mag_74_over_42_with_sigma2; - __m128i ch_mag_58_over_42_with_sigma2; - __m128i ch_mag_50_over_42_with_sigma2; - __m128i ch_mag_34_over_42_with_sigma2; - __m128i ch_mag_18_over_42_with_sigma2; - __m128i ch_mag_26_over_42_with_sigma2; - __m128i ch_mag_10_over_42_with_sigma2; - __m128i ch_mag_2_over_42_with_sigma2; - __m128i y0r_one_over_sqrt_21; - __m128i y0r_three_over_sqrt_21; - __m128i y0r_five_over_sqrt_21; - __m128i y0r_seven_over_sqrt_21; - __m128i y0i_one_over_sqrt_21; - __m128i y0i_three_over_sqrt_21; - __m128i y0i_five_over_sqrt_21; - __m128i y0i_seven_over_sqrt_21; - __m128i ch_mag_int_with_sigma2; - __m128i two_ch_mag_int_with_sigma2; - __m128i three_ch_mag_int_with_sigma2; -#elif defined(__arm__) || defined(__aarch64__) - -#endif - - int i,j; - - - for (i=0; i<length>>2; i+=2) { - -#if defined(__x86_64__) || defined(__i386__) - - // Get rho - xmm0 = rho01_128i[i]; - xmm1 = rho01_128i[i+1]; - xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] - //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] - xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho) - xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho) - rho_rpi = _mm_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho) - rho_rmi = _mm_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho) - - // Compute the different rhos - rho_rpi_1_1 = _mm_mulhi_epi16(rho_rpi, ONE_OVER_SQRT_42); - rho_rmi_1_1 = _mm_mulhi_epi16(rho_rmi, ONE_OVER_SQRT_42); - rho_rpi_3_3 = _mm_mulhi_epi16(rho_rpi, THREE_OVER_SQRT_42); - rho_rmi_3_3 = _mm_mulhi_epi16(rho_rmi, THREE_OVER_SQRT_42); - rho_rpi_5_5 = _mm_mulhi_epi16(rho_rpi, FIVE_OVER_SQRT_42); - rho_rmi_5_5 = _mm_mulhi_epi16(rho_rmi, FIVE_OVER_SQRT_42); - rho_rpi_7_7 = _mm_mulhi_epi16(rho_rpi, SEVEN_OVER_SQRT_42); - rho_rmi_7_7 = _mm_mulhi_epi16(rho_rmi, SEVEN_OVER_SQRT_42); - - rho_rpi_5_5 = _mm_slli_epi16(rho_rpi_5_5, 1); - rho_rmi_5_5 = _mm_slli_epi16(rho_rmi_5_5, 1); - rho_rpi_7_7 = _mm_slli_epi16(rho_rpi_7_7, 2); - rho_rmi_7_7 = _mm_slli_epi16(rho_rmi_7_7, 2); - - xmm4 = _mm_mulhi_epi16(xmm2, ONE_OVER_SQRT_42); - xmm5 = _mm_mulhi_epi16(xmm3, ONE_OVER_SQRT_42); - xmm6 = _mm_mulhi_epi16(xmm3, THREE_OVER_SQRT_42); - xmm7 = _mm_mulhi_epi16(xmm3, FIVE_OVER_SQRT_42); - xmm8 = _mm_mulhi_epi16(xmm3, SEVEN_OVER_SQRT_42); - xmm7 = _mm_slli_epi16(xmm7, 1); - xmm8 = _mm_slli_epi16(xmm8, 2); - - rho_rpi_1_3 = _mm_adds_epi16(xmm4, xmm6); - rho_rmi_1_3 = _mm_subs_epi16(xmm4, xmm6); - rho_rpi_1_5 = _mm_adds_epi16(xmm4, xmm7); - rho_rmi_1_5 = _mm_subs_epi16(xmm4, xmm7); - rho_rpi_1_7 = _mm_adds_epi16(xmm4, xmm8); - rho_rmi_1_7 = _mm_subs_epi16(xmm4, xmm8); - - xmm4 = _mm_mulhi_epi16(xmm2, THREE_OVER_SQRT_42); - rho_rpi_3_1 = _mm_adds_epi16(xmm4, xmm5); - rho_rmi_3_1 = _mm_subs_epi16(xmm4, xmm5); - rho_rpi_3_5 = _mm_adds_epi16(xmm4, xmm7); - rho_rmi_3_5 = _mm_subs_epi16(xmm4, xmm7); - rho_rpi_3_7 = _mm_adds_epi16(xmm4, xmm8); - rho_rmi_3_7 = _mm_subs_epi16(xmm4, xmm8); - - xmm4 = _mm_mulhi_epi16(xmm2, FIVE_OVER_SQRT_42); - xmm4 = _mm_slli_epi16(xmm4, 1); - rho_rpi_5_1 = _mm_adds_epi16(xmm4, xmm5); - rho_rmi_5_1 = _mm_subs_epi16(xmm4, xmm5); - rho_rpi_5_3 = _mm_adds_epi16(xmm4, xmm6); - rho_rmi_5_3 = _mm_subs_epi16(xmm4, xmm6); - rho_rpi_5_7 = _mm_adds_epi16(xmm4, xmm8); - rho_rmi_5_7 = _mm_subs_epi16(xmm4, xmm8); - - xmm4 = _mm_mulhi_epi16(xmm2, SEVEN_OVER_SQRT_42); - xmm4 = _mm_slli_epi16(xmm4, 2); - rho_rpi_7_1 = _mm_adds_epi16(xmm4, xmm5); - rho_rmi_7_1 = _mm_subs_epi16(xmm4, xmm5); - rho_rpi_7_3 = _mm_adds_epi16(xmm4, xmm6); - rho_rmi_7_3 = _mm_subs_epi16(xmm4, xmm6); - rho_rpi_7_5 = _mm_adds_epi16(xmm4, xmm7); - rho_rmi_7_5 = _mm_subs_epi16(xmm4, xmm7); - - // Rearrange interfering MF output - xmm0 = stream1_128i_in[i]; - xmm1 = stream1_128i_in[i+1]; - xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] - //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] - y1r = _mm_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)] - y1i = _mm_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)] - - // Psi_r calculation from rho_rpi or rho_rmi - xmm0 = _mm_setzero_si128(); // ZERO for abs_pi16 - xmm2 = _mm_subs_epi16(rho_rpi_7_7, y1r); - psi_r_p7_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_7_5, y1r); - psi_r_p7_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_7_3, y1r); - psi_r_p7_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_7_1, y1r); - psi_r_p7_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_7_1, y1r); - psi_r_p7_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_7_3, y1r); - psi_r_p7_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_7_5, y1r); - psi_r_p7_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_7_7, y1r); - psi_r_p7_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_5_7, y1r); - psi_r_p5_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_5_5, y1r); - psi_r_p5_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_5_3, y1r); - psi_r_p5_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_5_1, y1r); - psi_r_p5_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_5_1, y1r); - psi_r_p5_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_5_3, y1r); - psi_r_p5_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_5_5, y1r); - psi_r_p5_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_5_7, y1r); - psi_r_p5_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_3_7, y1r); - psi_r_p3_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_3_5, y1r); - psi_r_p3_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_3_3, y1r); - psi_r_p3_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_3_1, y1r); - psi_r_p3_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_3_1, y1r); - psi_r_p3_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_3_3, y1r); - psi_r_p3_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_3_5, y1r); - psi_r_p3_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_3_7, y1r); - psi_r_p3_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_1_7, y1r); - psi_r_p1_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_1_5, y1r); - psi_r_p1_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_1_3, y1r); - psi_r_p1_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_1_1, y1r); - psi_r_p1_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_1_1, y1r); - psi_r_p1_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_1_3, y1r); - psi_r_p1_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_1_5, y1r); - psi_r_p1_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_1_7, y1r); - psi_r_p1_m7 = _mm_abs_epi16(xmm2); - - xmm2 = _mm_adds_epi16(rho_rmi_1_7, y1r); - psi_r_m1_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_1_5, y1r); - psi_r_m1_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_1_3, y1r); - psi_r_m1_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_1_1, y1r); - psi_r_m1_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_1_1, y1r); - psi_r_m1_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_1_3, y1r); - psi_r_m1_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_1_5, y1r); - psi_r_m1_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_1_7, y1r); - psi_r_m1_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_3_7, y1r); - psi_r_m3_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_3_5, y1r); - psi_r_m3_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_3_3, y1r); - psi_r_m3_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_3_1, y1r); - psi_r_m3_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_3_1, y1r); - psi_r_m3_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_3_3, y1r); - psi_r_m3_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_3_5, y1r); - psi_r_m3_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_3_7, y1r); - psi_r_m3_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_5_7, y1r); - psi_r_m5_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_5_5, y1r); - psi_r_m5_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_5_3, y1r); - psi_r_m5_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_5_1, y1r); - psi_r_m5_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_5_1, y1r); - psi_r_m5_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_5_3, y1r); - psi_r_m5_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_5_5, y1r); - psi_r_m5_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_5_7, y1r); - psi_r_m5_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_7_7, y1r); - psi_r_m7_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_7_5, y1r); - psi_r_m7_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_7_3, y1r); - psi_r_m7_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_7_1, y1r); - psi_r_m7_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_7_1, y1r); - psi_r_m7_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_7_3, y1r); - psi_r_m7_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_7_5, y1r); - psi_r_m7_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_7_7, y1r); - psi_r_m7_m7 = _mm_abs_epi16(xmm2); - - // Psi_i calculation from rho_rpi or rho_rmi - xmm2 = _mm_subs_epi16(rho_rmi_7_7, y1i); - psi_i_p7_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_5_7, y1i); - psi_i_p7_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_3_7, y1i); - psi_i_p7_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_1_7, y1i); - psi_i_p7_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_1_7, y1i); - psi_i_p7_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_3_7, y1i); - psi_i_p7_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_5_7, y1i); - psi_i_p7_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_7_7, y1i); - psi_i_p7_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_7_5, y1i); - psi_i_p5_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_5_5, y1i); - psi_i_p5_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_3_5, y1i); - psi_i_p5_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_1_5, y1i); - psi_i_p5_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_1_5, y1i); - psi_i_p5_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_3_5, y1i); - psi_i_p5_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_5_5, y1i); - psi_i_p5_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_7_5, y1i); - psi_i_p5_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_7_3, y1i); - psi_i_p3_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_5_3, y1i); - psi_i_p3_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_3_3, y1i); - psi_i_p3_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_1_3, y1i); - psi_i_p3_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_1_3, y1i); - psi_i_p3_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_3_3, y1i); - psi_i_p3_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_5_3, y1i); - psi_i_p3_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_7_3, y1i); - psi_i_p3_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_7_1, y1i); - psi_i_p1_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_5_1, y1i); - psi_i_p1_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_3_1, y1i); - psi_i_p1_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rmi_1_1, y1i); - psi_i_p1_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_1_1, y1i); - psi_i_p1_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_3_1, y1i); - psi_i_p1_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_5_1, y1i); - psi_i_p1_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rpi_7_1, y1i); - psi_i_p1_m7 = _mm_abs_epi16(xmm2); - - xmm2 = _mm_subs_epi16(rho_rpi_7_1, y1i); - psi_i_m1_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_5_1, y1i); - psi_i_m1_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_3_1, y1i); - psi_i_m1_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_1_1, y1i); - psi_i_m1_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_1_1, y1i); - psi_i_m1_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_3_1, y1i); - psi_i_m1_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_5_1, y1i); - psi_i_m1_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_7_1, y1i); - psi_i_m1_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_7_3, y1i); - psi_i_m3_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_5_3, y1i); - psi_i_m3_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_3_3, y1i); - psi_i_m3_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_1_3, y1i); - psi_i_m3_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_1_3, y1i); - psi_i_m3_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_3_3, y1i); - psi_i_m3_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_5_3, y1i); - psi_i_m3_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_7_3, y1i); - psi_i_m3_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_7_5, y1i); - psi_i_m5_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_5_5, y1i); - psi_i_m5_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_3_5, y1i); - psi_i_m5_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_1_5, y1i); - psi_i_m5_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_1_5, y1i); - psi_i_m5_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_3_5, y1i); - psi_i_m5_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_5_5, y1i); - psi_i_m5_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_7_5, y1i); - psi_i_m5_m7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_7_7, y1i); - psi_i_m7_p7 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_5_7, y1i); - psi_i_m7_p5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_3_7, y1i); - psi_i_m7_p3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_subs_epi16(rho_rpi_1_7, y1i); - psi_i_m7_p1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_1_7, y1i); - psi_i_m7_m1 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_3_7, y1i); - psi_i_m7_m3 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_5_7, y1i); - psi_i_m7_m5 = _mm_abs_epi16(xmm2); - xmm2 = _mm_adds_epi16(rho_rmi_7_7, y1i); - psi_i_m7_m7 = _mm_abs_epi16(xmm2); - - - // Rearrange desired MF output - xmm0 = stream0_128i_in[i]; - xmm1 = stream0_128i_in[i+1]; - xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); - //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] - //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] - y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)] - y0i = _mm_unpackhi_epi64(xmm0,xmm1); - - // Rearrange desired channel magnitudes - xmm2 = ch_mag_128i[i]; // = [|h|^2(1),|h|^2(1),|h|^2(2),|h|^2(2)]*(2/sqrt(10)) - xmm3 = ch_mag_128i[i+1]; // = [|h|^2(3),|h|^2(3),|h|^2(4),|h|^2(4)]*(2/sqrt(10)) - xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - ch_mag_des = _mm_unpacklo_epi64(xmm2,xmm3); - - // Rearrange interfering channel magnitudes - xmm2 = ch_mag_128i_i[i]; - xmm3 = ch_mag_128i_i[i+1]; - xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); - ch_mag_int = _mm_unpacklo_epi64(xmm2,xmm3); - - y0r_one_over_sqrt_21 = _mm_mulhi_epi16(y0r, ONE_OVER_SQRT_42); - y0r_three_over_sqrt_21 = _mm_mulhi_epi16(y0r, THREE_OVER_SQRT_42); - y0r_five_over_sqrt_21 = _mm_mulhi_epi16(y0r, FIVE_OVER_SQRT_42); - y0r_five_over_sqrt_21 = _mm_slli_epi16(y0r_five_over_sqrt_21, 1); - y0r_seven_over_sqrt_21 = _mm_mulhi_epi16(y0r, SEVEN_OVER_SQRT_42); - y0r_seven_over_sqrt_21 = _mm_slli_epi16(y0r_seven_over_sqrt_21, 2); // Q2.14 - - y0i_one_over_sqrt_21 = _mm_mulhi_epi16(y0i, ONE_OVER_SQRT_42); - y0i_three_over_sqrt_21 = _mm_mulhi_epi16(y0i, THREE_OVER_SQRT_42); - y0i_five_over_sqrt_21 = _mm_mulhi_epi16(y0i, FIVE_OVER_SQRT_42); - y0i_five_over_sqrt_21 = _mm_slli_epi16(y0i_five_over_sqrt_21, 1); - y0i_seven_over_sqrt_21 = _mm_mulhi_epi16(y0i, SEVEN_OVER_SQRT_42); - y0i_seven_over_sqrt_21 = _mm_slli_epi16(y0i_seven_over_sqrt_21, 2); // Q2.14 - - y0_p_7_1 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21); - y0_p_7_3 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21); - y0_p_7_5 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21); - y0_p_7_7 = _mm_adds_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21); - y0_p_5_1 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21); - y0_p_5_3 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21); - y0_p_5_5 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21); - y0_p_5_7 = _mm_adds_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21); - y0_p_3_1 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21); - y0_p_3_3 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21); - y0_p_3_5 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21); - y0_p_3_7 = _mm_adds_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21); - y0_p_1_1 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21); - y0_p_1_3 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21); - y0_p_1_5 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21); - y0_p_1_7 = _mm_adds_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21); - - y0_m_1_1 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21); - y0_m_1_3 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21); - y0_m_1_5 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21); - y0_m_1_7 = _mm_subs_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21); - y0_m_3_1 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21); - y0_m_3_3 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21); - y0_m_3_5 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21); - y0_m_3_7 = _mm_subs_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21); - y0_m_5_1 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21); - y0_m_5_3 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21); - y0_m_5_5 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21); - y0_m_5_7 = _mm_subs_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21); - y0_m_7_1 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21); - y0_m_7_3 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21); - y0_m_7_5 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21); - y0_m_7_7 = _mm_subs_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21); - - // Detection of interference term - ch_mag_int_with_sigma2 = _mm_srai_epi16(ch_mag_int, 1); // *2 - two_ch_mag_int_with_sigma2 = ch_mag_int; // *4 - three_ch_mag_int_with_sigma2 = _mm_adds_epi16(ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2); // *6 - - interference_abs_64qam_epi16(psi_r_p7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_p1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_r_m7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - - interference_abs_64qam_epi16(psi_i_p7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_p1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - interference_abs_64qam_epi16(psi_i_m7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, - SEVEN_OVER_SQRT_2_42); - - // Calculation of a group of two terms in the bit metric involving product of psi and interference - prodsum_psi_a_epi16(psi_r_p7_p7, a_r_p7_p7, psi_i_p7_p7, a_i_p7_p7, psi_a_p7_p7); - prodsum_psi_a_epi16(psi_r_p7_p5, a_r_p7_p5, psi_i_p7_p5, a_i_p7_p5, psi_a_p7_p5); - prodsum_psi_a_epi16(psi_r_p7_p3, a_r_p7_p3, psi_i_p7_p3, a_i_p7_p3, psi_a_p7_p3); - prodsum_psi_a_epi16(psi_r_p7_p1, a_r_p7_p1, psi_i_p7_p1, a_i_p7_p1, psi_a_p7_p1); - prodsum_psi_a_epi16(psi_r_p7_m1, a_r_p7_m1, psi_i_p7_m1, a_i_p7_m1, psi_a_p7_m1); - prodsum_psi_a_epi16(psi_r_p7_m3, a_r_p7_m3, psi_i_p7_m3, a_i_p7_m3, psi_a_p7_m3); - prodsum_psi_a_epi16(psi_r_p7_m5, a_r_p7_m5, psi_i_p7_m5, a_i_p7_m5, psi_a_p7_m5); - prodsum_psi_a_epi16(psi_r_p7_m7, a_r_p7_m7, psi_i_p7_m7, a_i_p7_m7, psi_a_p7_m7); - prodsum_psi_a_epi16(psi_r_p5_p7, a_r_p5_p7, psi_i_p5_p7, a_i_p5_p7, psi_a_p5_p7); - prodsum_psi_a_epi16(psi_r_p5_p5, a_r_p5_p5, psi_i_p5_p5, a_i_p5_p5, psi_a_p5_p5); - prodsum_psi_a_epi16(psi_r_p5_p3, a_r_p5_p3, psi_i_p5_p3, a_i_p5_p3, psi_a_p5_p3); - prodsum_psi_a_epi16(psi_r_p5_p1, a_r_p5_p1, psi_i_p5_p1, a_i_p5_p1, psi_a_p5_p1); - prodsum_psi_a_epi16(psi_r_p5_m1, a_r_p5_m1, psi_i_p5_m1, a_i_p5_m1, psi_a_p5_m1); - prodsum_psi_a_epi16(psi_r_p5_m3, a_r_p5_m3, psi_i_p5_m3, a_i_p5_m3, psi_a_p5_m3); - prodsum_psi_a_epi16(psi_r_p5_m5, a_r_p5_m5, psi_i_p5_m5, a_i_p5_m5, psi_a_p5_m5); - prodsum_psi_a_epi16(psi_r_p5_m7, a_r_p5_m7, psi_i_p5_m7, a_i_p5_m7, psi_a_p5_m7); - prodsum_psi_a_epi16(psi_r_p3_p7, a_r_p3_p7, psi_i_p3_p7, a_i_p3_p7, psi_a_p3_p7); - prodsum_psi_a_epi16(psi_r_p3_p5, a_r_p3_p5, psi_i_p3_p5, a_i_p3_p5, psi_a_p3_p5); - prodsum_psi_a_epi16(psi_r_p3_p3, a_r_p3_p3, psi_i_p3_p3, a_i_p3_p3, psi_a_p3_p3); - prodsum_psi_a_epi16(psi_r_p3_p1, a_r_p3_p1, psi_i_p3_p1, a_i_p3_p1, psi_a_p3_p1); - prodsum_psi_a_epi16(psi_r_p3_m1, a_r_p3_m1, psi_i_p3_m1, a_i_p3_m1, psi_a_p3_m1); - prodsum_psi_a_epi16(psi_r_p3_m3, a_r_p3_m3, psi_i_p3_m3, a_i_p3_m3, psi_a_p3_m3); - prodsum_psi_a_epi16(psi_r_p3_m5, a_r_p3_m5, psi_i_p3_m5, a_i_p3_m5, psi_a_p3_m5); - prodsum_psi_a_epi16(psi_r_p3_m7, a_r_p3_m7, psi_i_p3_m7, a_i_p3_m7, psi_a_p3_m7); - prodsum_psi_a_epi16(psi_r_p1_p7, a_r_p1_p7, psi_i_p1_p7, a_i_p1_p7, psi_a_p1_p7); - prodsum_psi_a_epi16(psi_r_p1_p5, a_r_p1_p5, psi_i_p1_p5, a_i_p1_p5, psi_a_p1_p5); - prodsum_psi_a_epi16(psi_r_p1_p3, a_r_p1_p3, psi_i_p1_p3, a_i_p1_p3, psi_a_p1_p3); - prodsum_psi_a_epi16(psi_r_p1_p1, a_r_p1_p1, psi_i_p1_p1, a_i_p1_p1, psi_a_p1_p1); - prodsum_psi_a_epi16(psi_r_p1_m1, a_r_p1_m1, psi_i_p1_m1, a_i_p1_m1, psi_a_p1_m1); - prodsum_psi_a_epi16(psi_r_p1_m3, a_r_p1_m3, psi_i_p1_m3, a_i_p1_m3, psi_a_p1_m3); - prodsum_psi_a_epi16(psi_r_p1_m5, a_r_p1_m5, psi_i_p1_m5, a_i_p1_m5, psi_a_p1_m5); - prodsum_psi_a_epi16(psi_r_p1_m7, a_r_p1_m7, psi_i_p1_m7, a_i_p1_m7, psi_a_p1_m7); - prodsum_psi_a_epi16(psi_r_m1_p7, a_r_m1_p7, psi_i_m1_p7, a_i_m1_p7, psi_a_m1_p7); - prodsum_psi_a_epi16(psi_r_m1_p5, a_r_m1_p5, psi_i_m1_p5, a_i_m1_p5, psi_a_m1_p5); - prodsum_psi_a_epi16(psi_r_m1_p3, a_r_m1_p3, psi_i_m1_p3, a_i_m1_p3, psi_a_m1_p3); - prodsum_psi_a_epi16(psi_r_m1_p1, a_r_m1_p1, psi_i_m1_p1, a_i_m1_p1, psi_a_m1_p1); - prodsum_psi_a_epi16(psi_r_m1_m1, a_r_m1_m1, psi_i_m1_m1, a_i_m1_m1, psi_a_m1_m1); - prodsum_psi_a_epi16(psi_r_m1_m3, a_r_m1_m3, psi_i_m1_m3, a_i_m1_m3, psi_a_m1_m3); - prodsum_psi_a_epi16(psi_r_m1_m5, a_r_m1_m5, psi_i_m1_m5, a_i_m1_m5, psi_a_m1_m5); - prodsum_psi_a_epi16(psi_r_m1_m7, a_r_m1_m7, psi_i_m1_m7, a_i_m1_m7, psi_a_m1_m7); - prodsum_psi_a_epi16(psi_r_m3_p7, a_r_m3_p7, psi_i_m3_p7, a_i_m3_p7, psi_a_m3_p7); - prodsum_psi_a_epi16(psi_r_m3_p5, a_r_m3_p5, psi_i_m3_p5, a_i_m3_p5, psi_a_m3_p5); - prodsum_psi_a_epi16(psi_r_m3_p3, a_r_m3_p3, psi_i_m3_p3, a_i_m3_p3, psi_a_m3_p3); - prodsum_psi_a_epi16(psi_r_m3_p1, a_r_m3_p1, psi_i_m3_p1, a_i_m3_p1, psi_a_m3_p1); - prodsum_psi_a_epi16(psi_r_m3_m1, a_r_m3_m1, psi_i_m3_m1, a_i_m3_m1, psi_a_m3_m1); - prodsum_psi_a_epi16(psi_r_m3_m3, a_r_m3_m3, psi_i_m3_m3, a_i_m3_m3, psi_a_m3_m3); - prodsum_psi_a_epi16(psi_r_m3_m5, a_r_m3_m5, psi_i_m3_m5, a_i_m3_m5, psi_a_m3_m5); - prodsum_psi_a_epi16(psi_r_m3_m7, a_r_m3_m7, psi_i_m3_m7, a_i_m3_m7, psi_a_m3_m7); - prodsum_psi_a_epi16(psi_r_m5_p7, a_r_m5_p7, psi_i_m5_p7, a_i_m5_p7, psi_a_m5_p7); - prodsum_psi_a_epi16(psi_r_m5_p5, a_r_m5_p5, psi_i_m5_p5, a_i_m5_p5, psi_a_m5_p5); - prodsum_psi_a_epi16(psi_r_m5_p3, a_r_m5_p3, psi_i_m5_p3, a_i_m5_p3, psi_a_m5_p3); - prodsum_psi_a_epi16(psi_r_m5_p1, a_r_m5_p1, psi_i_m5_p1, a_i_m5_p1, psi_a_m5_p1); - prodsum_psi_a_epi16(psi_r_m5_m1, a_r_m5_m1, psi_i_m5_m1, a_i_m5_m1, psi_a_m5_m1); - prodsum_psi_a_epi16(psi_r_m5_m3, a_r_m5_m3, psi_i_m5_m3, a_i_m5_m3, psi_a_m5_m3); - prodsum_psi_a_epi16(psi_r_m5_m5, a_r_m5_m5, psi_i_m5_m5, a_i_m5_m5, psi_a_m5_m5); - prodsum_psi_a_epi16(psi_r_m5_m7, a_r_m5_m7, psi_i_m5_m7, a_i_m5_m7, psi_a_m5_m7); - prodsum_psi_a_epi16(psi_r_m7_p7, a_r_m7_p7, psi_i_m7_p7, a_i_m7_p7, psi_a_m7_p7); - prodsum_psi_a_epi16(psi_r_m7_p5, a_r_m7_p5, psi_i_m7_p5, a_i_m7_p5, psi_a_m7_p5); - prodsum_psi_a_epi16(psi_r_m7_p3, a_r_m7_p3, psi_i_m7_p3, a_i_m7_p3, psi_a_m7_p3); - prodsum_psi_a_epi16(psi_r_m7_p1, a_r_m7_p1, psi_i_m7_p1, a_i_m7_p1, psi_a_m7_p1); - prodsum_psi_a_epi16(psi_r_m7_m1, a_r_m7_m1, psi_i_m7_m1, a_i_m7_m1, psi_a_m7_m1); - prodsum_psi_a_epi16(psi_r_m7_m3, a_r_m7_m3, psi_i_m7_m3, a_i_m7_m3, psi_a_m7_m3); - prodsum_psi_a_epi16(psi_r_m7_m5, a_r_m7_m5, psi_i_m7_m5, a_i_m7_m5, psi_a_m7_m5); - prodsum_psi_a_epi16(psi_r_m7_m7, a_r_m7_m7, psi_i_m7_m7, a_i_m7_m7, psi_a_m7_m7); - - // Multiply by sqrt(2) - psi_a_p7_p7 = _mm_mulhi_epi16(psi_a_p7_p7, ONE_OVER_SQRT_2); - psi_a_p7_p7 = _mm_slli_epi16(psi_a_p7_p7, 2); - psi_a_p7_p5 = _mm_mulhi_epi16(psi_a_p7_p5, ONE_OVER_SQRT_2); - psi_a_p7_p5 = _mm_slli_epi16(psi_a_p7_p5, 2); - psi_a_p7_p3 = _mm_mulhi_epi16(psi_a_p7_p3, ONE_OVER_SQRT_2); - psi_a_p7_p3 = _mm_slli_epi16(psi_a_p7_p3, 2); - psi_a_p7_p1 = _mm_mulhi_epi16(psi_a_p7_p1, ONE_OVER_SQRT_2); - psi_a_p7_p1 = _mm_slli_epi16(psi_a_p7_p1, 2); - psi_a_p7_m1 = _mm_mulhi_epi16(psi_a_p7_m1, ONE_OVER_SQRT_2); - psi_a_p7_m1 = _mm_slli_epi16(psi_a_p7_m1, 2); - psi_a_p7_m3 = _mm_mulhi_epi16(psi_a_p7_m3, ONE_OVER_SQRT_2); - psi_a_p7_m3 = _mm_slli_epi16(psi_a_p7_m3, 2); - psi_a_p7_m5 = _mm_mulhi_epi16(psi_a_p7_m5, ONE_OVER_SQRT_2); - psi_a_p7_m5 = _mm_slli_epi16(psi_a_p7_m5, 2); - psi_a_p7_m7 = _mm_mulhi_epi16(psi_a_p7_m7, ONE_OVER_SQRT_2); - psi_a_p7_m7 = _mm_slli_epi16(psi_a_p7_m7, 2); - psi_a_p5_p7 = _mm_mulhi_epi16(psi_a_p5_p7, ONE_OVER_SQRT_2); - psi_a_p5_p7 = _mm_slli_epi16(psi_a_p5_p7, 2); - psi_a_p5_p5 = _mm_mulhi_epi16(psi_a_p5_p5, ONE_OVER_SQRT_2); - psi_a_p5_p5 = _mm_slli_epi16(psi_a_p5_p5, 2); - psi_a_p5_p3 = _mm_mulhi_epi16(psi_a_p5_p3, ONE_OVER_SQRT_2); - psi_a_p5_p3 = _mm_slli_epi16(psi_a_p5_p3, 2); - psi_a_p5_p1 = _mm_mulhi_epi16(psi_a_p5_p1, ONE_OVER_SQRT_2); - psi_a_p5_p1 = _mm_slli_epi16(psi_a_p5_p1, 2); - psi_a_p5_m1 = _mm_mulhi_epi16(psi_a_p5_m1, ONE_OVER_SQRT_2); - psi_a_p5_m1 = _mm_slli_epi16(psi_a_p5_m1, 2); - psi_a_p5_m3 = _mm_mulhi_epi16(psi_a_p5_m3, ONE_OVER_SQRT_2); - psi_a_p5_m3 = _mm_slli_epi16(psi_a_p5_m3, 2); - psi_a_p5_m5 = _mm_mulhi_epi16(psi_a_p5_m5, ONE_OVER_SQRT_2); - psi_a_p5_m5 = _mm_slli_epi16(psi_a_p5_m5, 2); - psi_a_p5_m7 = _mm_mulhi_epi16(psi_a_p5_m7, ONE_OVER_SQRT_2); - psi_a_p5_m7 = _mm_slli_epi16(psi_a_p5_m7, 2); - psi_a_p3_p7 = _mm_mulhi_epi16(psi_a_p3_p7, ONE_OVER_SQRT_2); - psi_a_p3_p7 = _mm_slli_epi16(psi_a_p3_p7, 2); - psi_a_p3_p5 = _mm_mulhi_epi16(psi_a_p3_p5, ONE_OVER_SQRT_2); - psi_a_p3_p5 = _mm_slli_epi16(psi_a_p3_p5, 2); - psi_a_p3_p3 = _mm_mulhi_epi16(psi_a_p3_p3, ONE_OVER_SQRT_2); - psi_a_p3_p3 = _mm_slli_epi16(psi_a_p3_p3, 2); - psi_a_p3_p1 = _mm_mulhi_epi16(psi_a_p3_p1, ONE_OVER_SQRT_2); - psi_a_p3_p1 = _mm_slli_epi16(psi_a_p3_p1, 2); - psi_a_p3_m1 = _mm_mulhi_epi16(psi_a_p3_m1, ONE_OVER_SQRT_2); - psi_a_p3_m1 = _mm_slli_epi16(psi_a_p3_m1, 2); - psi_a_p3_m3 = _mm_mulhi_epi16(psi_a_p3_m3, ONE_OVER_SQRT_2); - psi_a_p3_m3 = _mm_slli_epi16(psi_a_p3_m3, 2); - psi_a_p3_m5 = _mm_mulhi_epi16(psi_a_p3_m5, ONE_OVER_SQRT_2); - psi_a_p3_m5 = _mm_slli_epi16(psi_a_p3_m5, 2); - psi_a_p3_m7 = _mm_mulhi_epi16(psi_a_p3_m7, ONE_OVER_SQRT_2); - psi_a_p3_m7 = _mm_slli_epi16(psi_a_p3_m7, 2); - psi_a_p1_p7 = _mm_mulhi_epi16(psi_a_p1_p7, ONE_OVER_SQRT_2); - psi_a_p1_p7 = _mm_slli_epi16(psi_a_p1_p7, 2); - psi_a_p1_p5 = _mm_mulhi_epi16(psi_a_p1_p5, ONE_OVER_SQRT_2); - psi_a_p1_p5 = _mm_slli_epi16(psi_a_p1_p5, 2); - psi_a_p1_p3 = _mm_mulhi_epi16(psi_a_p1_p3, ONE_OVER_SQRT_2); - psi_a_p1_p3 = _mm_slli_epi16(psi_a_p1_p3, 2); - psi_a_p1_p1 = _mm_mulhi_epi16(psi_a_p1_p1, ONE_OVER_SQRT_2); - psi_a_p1_p1 = _mm_slli_epi16(psi_a_p1_p1, 2); - psi_a_p1_m1 = _mm_mulhi_epi16(psi_a_p1_m1, ONE_OVER_SQRT_2); - psi_a_p1_m1 = _mm_slli_epi16(psi_a_p1_m1, 2); - psi_a_p1_m3 = _mm_mulhi_epi16(psi_a_p1_m3, ONE_OVER_SQRT_2); - psi_a_p1_m3 = _mm_slli_epi16(psi_a_p1_m3, 2); - psi_a_p1_m5 = _mm_mulhi_epi16(psi_a_p1_m5, ONE_OVER_SQRT_2); - psi_a_p1_m5 = _mm_slli_epi16(psi_a_p1_m5, 2); - psi_a_p1_m7 = _mm_mulhi_epi16(psi_a_p1_m7, ONE_OVER_SQRT_2); - psi_a_p1_m7 = _mm_slli_epi16(psi_a_p1_m7, 2); - psi_a_m1_p7 = _mm_mulhi_epi16(psi_a_m1_p7, ONE_OVER_SQRT_2); - psi_a_m1_p7 = _mm_slli_epi16(psi_a_m1_p7, 2); - psi_a_m1_p5 = _mm_mulhi_epi16(psi_a_m1_p5, ONE_OVER_SQRT_2); - psi_a_m1_p5 = _mm_slli_epi16(psi_a_m1_p5, 2); - psi_a_m1_p3 = _mm_mulhi_epi16(psi_a_m1_p3, ONE_OVER_SQRT_2); - psi_a_m1_p3 = _mm_slli_epi16(psi_a_m1_p3, 2); - psi_a_m1_p1 = _mm_mulhi_epi16(psi_a_m1_p1, ONE_OVER_SQRT_2); - psi_a_m1_p1 = _mm_slli_epi16(psi_a_m1_p1, 2); - psi_a_m1_m1 = _mm_mulhi_epi16(psi_a_m1_m1, ONE_OVER_SQRT_2); - psi_a_m1_m1 = _mm_slli_epi16(psi_a_m1_m1, 2); - psi_a_m1_m3 = _mm_mulhi_epi16(psi_a_m1_m3, ONE_OVER_SQRT_2); - psi_a_m1_m3 = _mm_slli_epi16(psi_a_m1_m3, 2); - psi_a_m1_m5 = _mm_mulhi_epi16(psi_a_m1_m5, ONE_OVER_SQRT_2); - psi_a_m1_m5 = _mm_slli_epi16(psi_a_m1_m5, 2); - psi_a_m1_m7 = _mm_mulhi_epi16(psi_a_m1_m7, ONE_OVER_SQRT_2); - psi_a_m1_m7 = _mm_slli_epi16(psi_a_m1_m7, 2); - psi_a_m3_p7 = _mm_mulhi_epi16(psi_a_m3_p7, ONE_OVER_SQRT_2); - psi_a_m3_p7 = _mm_slli_epi16(psi_a_m3_p7, 2); - psi_a_m3_p5 = _mm_mulhi_epi16(psi_a_m3_p5, ONE_OVER_SQRT_2); - psi_a_m3_p5 = _mm_slli_epi16(psi_a_m3_p5, 2); - psi_a_m3_p3 = _mm_mulhi_epi16(psi_a_m3_p3, ONE_OVER_SQRT_2); - psi_a_m3_p3 = _mm_slli_epi16(psi_a_m3_p3, 2); - psi_a_m3_p1 = _mm_mulhi_epi16(psi_a_m3_p1, ONE_OVER_SQRT_2); - psi_a_m3_p1 = _mm_slli_epi16(psi_a_m3_p1, 2); - psi_a_m3_m1 = _mm_mulhi_epi16(psi_a_m3_m1, ONE_OVER_SQRT_2); - psi_a_m3_m1 = _mm_slli_epi16(psi_a_m3_m1, 2); - psi_a_m3_m3 = _mm_mulhi_epi16(psi_a_m3_m3, ONE_OVER_SQRT_2); - psi_a_m3_m3 = _mm_slli_epi16(psi_a_m3_m3, 2); - psi_a_m3_m5 = _mm_mulhi_epi16(psi_a_m3_m5, ONE_OVER_SQRT_2); - psi_a_m3_m5 = _mm_slli_epi16(psi_a_m3_m5, 2); - psi_a_m3_m7 = _mm_mulhi_epi16(psi_a_m3_m7, ONE_OVER_SQRT_2); - psi_a_m3_m7 = _mm_slli_epi16(psi_a_m3_m7, 2); - psi_a_m5_p7 = _mm_mulhi_epi16(psi_a_m5_p7, ONE_OVER_SQRT_2); - psi_a_m5_p7 = _mm_slli_epi16(psi_a_m5_p7, 2); - psi_a_m5_p5 = _mm_mulhi_epi16(psi_a_m5_p5, ONE_OVER_SQRT_2); - psi_a_m5_p5 = _mm_slli_epi16(psi_a_m5_p5, 2); - psi_a_m5_p3 = _mm_mulhi_epi16(psi_a_m5_p3, ONE_OVER_SQRT_2); - psi_a_m5_p3 = _mm_slli_epi16(psi_a_m5_p3, 2); - psi_a_m5_p1 = _mm_mulhi_epi16(psi_a_m5_p1, ONE_OVER_SQRT_2); - psi_a_m5_p1 = _mm_slli_epi16(psi_a_m5_p1, 2); - psi_a_m5_m1 = _mm_mulhi_epi16(psi_a_m5_m1, ONE_OVER_SQRT_2); - psi_a_m5_m1 = _mm_slli_epi16(psi_a_m5_m1, 2); - psi_a_m5_m3 = _mm_mulhi_epi16(psi_a_m5_m3, ONE_OVER_SQRT_2); - psi_a_m5_m3 = _mm_slli_epi16(psi_a_m5_m3, 2); - psi_a_m5_m5 = _mm_mulhi_epi16(psi_a_m5_m5, ONE_OVER_SQRT_2); - psi_a_m5_m5 = _mm_slli_epi16(psi_a_m5_m5, 2); - psi_a_m5_m7 = _mm_mulhi_epi16(psi_a_m5_m7, ONE_OVER_SQRT_2); - psi_a_m5_m7 = _mm_slli_epi16(psi_a_m5_m7, 2); - psi_a_m7_p7 = _mm_mulhi_epi16(psi_a_m7_p7, ONE_OVER_SQRT_2); - psi_a_m7_p7 = _mm_slli_epi16(psi_a_m7_p7, 2); - psi_a_m7_p5 = _mm_mulhi_epi16(psi_a_m7_p5, ONE_OVER_SQRT_2); - psi_a_m7_p5 = _mm_slli_epi16(psi_a_m7_p5, 2); - psi_a_m7_p3 = _mm_mulhi_epi16(psi_a_m7_p3, ONE_OVER_SQRT_2); - psi_a_m7_p3 = _mm_slli_epi16(psi_a_m7_p3, 2); - psi_a_m7_p1 = _mm_mulhi_epi16(psi_a_m7_p1, ONE_OVER_SQRT_2); - psi_a_m7_p1 = _mm_slli_epi16(psi_a_m7_p1, 2); - psi_a_m7_m1 = _mm_mulhi_epi16(psi_a_m7_m1, ONE_OVER_SQRT_2); - psi_a_m7_m1 = _mm_slli_epi16(psi_a_m7_m1, 2); - psi_a_m7_m3 = _mm_mulhi_epi16(psi_a_m7_m3, ONE_OVER_SQRT_2); - psi_a_m7_m3 = _mm_slli_epi16(psi_a_m7_m3, 2); - psi_a_m7_m5 = _mm_mulhi_epi16(psi_a_m7_m5, ONE_OVER_SQRT_2); - psi_a_m7_m5 = _mm_slli_epi16(psi_a_m7_m5, 2); - psi_a_m7_m7 = _mm_mulhi_epi16(psi_a_m7_m7, ONE_OVER_SQRT_2); - psi_a_m7_m7 = _mm_slli_epi16(psi_a_m7_m7, 2); - - // Calculation of a group of two terms in the bit metric involving squares of interference - square_a_64qam_epi16(a_r_p7_p7, a_i_p7_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p7); - square_a_64qam_epi16(a_r_p7_p5, a_i_p7_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p5); - square_a_64qam_epi16(a_r_p7_p3, a_i_p7_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p3); - square_a_64qam_epi16(a_r_p7_p1, a_i_p7_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p1); - square_a_64qam_epi16(a_r_p7_m1, a_i_p7_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m1); - square_a_64qam_epi16(a_r_p7_m3, a_i_p7_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m3); - square_a_64qam_epi16(a_r_p7_m5, a_i_p7_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m5); - square_a_64qam_epi16(a_r_p7_m7, a_i_p7_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m7); - square_a_64qam_epi16(a_r_p5_p7, a_i_p5_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p7); - square_a_64qam_epi16(a_r_p5_p5, a_i_p5_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p5); - square_a_64qam_epi16(a_r_p5_p3, a_i_p5_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p3); - square_a_64qam_epi16(a_r_p5_p1, a_i_p5_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p1); - square_a_64qam_epi16(a_r_p5_m1, a_i_p5_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m1); - square_a_64qam_epi16(a_r_p5_m3, a_i_p5_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m3); - square_a_64qam_epi16(a_r_p5_m5, a_i_p5_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m5); - square_a_64qam_epi16(a_r_p5_m7, a_i_p5_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m7); - square_a_64qam_epi16(a_r_p3_p7, a_i_p3_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p7); - square_a_64qam_epi16(a_r_p3_p5, a_i_p3_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p5); - square_a_64qam_epi16(a_r_p3_p3, a_i_p3_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p3); - square_a_64qam_epi16(a_r_p3_p1, a_i_p3_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p1); - square_a_64qam_epi16(a_r_p3_m1, a_i_p3_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m1); - square_a_64qam_epi16(a_r_p3_m3, a_i_p3_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m3); - square_a_64qam_epi16(a_r_p3_m5, a_i_p3_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m5); - square_a_64qam_epi16(a_r_p3_m7, a_i_p3_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m7); - square_a_64qam_epi16(a_r_p1_p7, a_i_p1_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p7); - square_a_64qam_epi16(a_r_p1_p5, a_i_p1_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p5); - square_a_64qam_epi16(a_r_p1_p3, a_i_p1_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p3); - square_a_64qam_epi16(a_r_p1_p1, a_i_p1_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p1); - square_a_64qam_epi16(a_r_p1_m1, a_i_p1_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m1); - square_a_64qam_epi16(a_r_p1_m3, a_i_p1_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m3); - square_a_64qam_epi16(a_r_p1_m5, a_i_p1_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m5); - square_a_64qam_epi16(a_r_p1_m7, a_i_p1_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m7); - square_a_64qam_epi16(a_r_m1_p7, a_i_m1_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p7); - square_a_64qam_epi16(a_r_m1_p5, a_i_m1_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p5); - square_a_64qam_epi16(a_r_m1_p3, a_i_m1_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p3); - square_a_64qam_epi16(a_r_m1_p1, a_i_m1_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p1); - square_a_64qam_epi16(a_r_m1_m1, a_i_m1_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m1); - square_a_64qam_epi16(a_r_m1_m3, a_i_m1_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m3); - square_a_64qam_epi16(a_r_m1_m5, a_i_m1_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m5); - square_a_64qam_epi16(a_r_m1_m7, a_i_m1_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m7); - square_a_64qam_epi16(a_r_m3_p7, a_i_m3_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p7); - square_a_64qam_epi16(a_r_m3_p5, a_i_m3_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p5); - square_a_64qam_epi16(a_r_m3_p3, a_i_m3_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p3); - square_a_64qam_epi16(a_r_m3_p1, a_i_m3_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p1); - square_a_64qam_epi16(a_r_m3_m1, a_i_m3_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m1); - square_a_64qam_epi16(a_r_m3_m3, a_i_m3_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m3); - square_a_64qam_epi16(a_r_m3_m5, a_i_m3_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m5); - square_a_64qam_epi16(a_r_m3_m7, a_i_m3_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m7); - square_a_64qam_epi16(a_r_m5_p7, a_i_m5_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p7); - square_a_64qam_epi16(a_r_m5_p5, a_i_m5_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p5); - square_a_64qam_epi16(a_r_m5_p3, a_i_m5_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p3); - square_a_64qam_epi16(a_r_m5_p1, a_i_m5_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p1); - square_a_64qam_epi16(a_r_m5_m1, a_i_m5_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m1); - square_a_64qam_epi16(a_r_m5_m3, a_i_m5_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m3); - square_a_64qam_epi16(a_r_m5_m5, a_i_m5_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m5); - square_a_64qam_epi16(a_r_m5_m7, a_i_m5_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m7); - square_a_64qam_epi16(a_r_m7_p7, a_i_m7_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p7); - square_a_64qam_epi16(a_r_m7_p5, a_i_m7_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p5); - square_a_64qam_epi16(a_r_m7_p3, a_i_m7_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p3); - square_a_64qam_epi16(a_r_m7_p1, a_i_m7_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p1); - square_a_64qam_epi16(a_r_m7_m1, a_i_m7_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m1); - square_a_64qam_epi16(a_r_m7_m3, a_i_m7_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m3); - square_a_64qam_epi16(a_r_m7_m5, a_i_m7_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m5); - square_a_64qam_epi16(a_r_m7_m7, a_i_m7_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m7); - - // Computing different multiples of ||h0||^2 - // x=1, y=1 - ch_mag_2_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,ONE_OVER_FOUR_SQRT_42); - ch_mag_2_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_2_over_42_with_sigma2,1); - // x=1, y=3 - ch_mag_10_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,FIVE_OVER_FOUR_SQRT_42); - ch_mag_10_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_10_over_42_with_sigma2,1); - // x=1, x=5 - ch_mag_26_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,THIRTEEN_OVER_FOUR_SQRT_42); - ch_mag_26_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_26_over_42_with_sigma2,1); - // x=1, y=7 - ch_mag_50_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42); - ch_mag_50_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_50_over_42_with_sigma2,1); - // x=3, y=3 - ch_mag_18_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,NINE_OVER_FOUR_SQRT_42); - ch_mag_18_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_18_over_42_with_sigma2,1); - // x=3, y=5 - ch_mag_34_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,SEVENTEEN_OVER_FOUR_SQRT_42); - ch_mag_34_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_34_over_42_with_sigma2,1); - // x=3, y=7 - ch_mag_58_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,TWENTYNINE_OVER_FOUR_SQRT_42); - ch_mag_58_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_58_over_42_with_sigma2,2); - // x=5, y=5 - ch_mag_50_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42); - ch_mag_50_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_50_over_42_with_sigma2,1); - // x=5, y=7 - ch_mag_74_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,THIRTYSEVEN_OVER_FOUR_SQRT_42); - ch_mag_74_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_74_over_42_with_sigma2,2); - // x=7, y=7 - ch_mag_98_over_42_with_sigma2 = _mm_mulhi_epi16(ch_mag_des,FORTYNINE_OVER_FOUR_SQRT_42); - ch_mag_98_over_42_with_sigma2 = _mm_slli_epi16(ch_mag_98_over_42_with_sigma2,2); - - // Computing Metrics - xmm0 = _mm_subs_epi16(psi_a_p7_p7, a_sq_p7_p7); - xmm1 = _mm_adds_epi16(xmm0, y0_p_7_7); - bit_met_p7_p7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p7_p5, a_sq_p7_p5); - xmm1 = _mm_adds_epi16(xmm0, y0_p_7_5); - bit_met_p7_p5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p7_p3, a_sq_p7_p3); - xmm1 = _mm_adds_epi16(xmm0, y0_p_7_3); - bit_met_p7_p3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p7_p1, a_sq_p7_p1); - xmm1 = _mm_adds_epi16(xmm0, y0_p_7_1); - bit_met_p7_p1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p7_m1, a_sq_p7_m1); - xmm1 = _mm_adds_epi16(xmm0, y0_m_7_1); - bit_met_p7_m1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p7_m3, a_sq_p7_m3); - xmm1 = _mm_adds_epi16(xmm0, y0_m_7_3); - bit_met_p7_m3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p7_m5, a_sq_p7_m5); - xmm1 = _mm_adds_epi16(xmm0, y0_m_7_5); - bit_met_p7_m5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p7_m7, a_sq_p7_m7); - xmm1 = _mm_adds_epi16(xmm0, y0_m_7_7); - bit_met_p7_m7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p5_p7, a_sq_p5_p7); - xmm1 = _mm_adds_epi16(xmm0, y0_p_5_7); - bit_met_p5_p7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p5_p5, a_sq_p5_p5); - xmm1 = _mm_adds_epi16(xmm0, y0_p_5_5); - bit_met_p5_p5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p5_p3, a_sq_p5_p3); - xmm1 = _mm_adds_epi16(xmm0, y0_p_5_3); - bit_met_p5_p3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p5_p1, a_sq_p5_p1); - xmm1 = _mm_adds_epi16(xmm0, y0_p_5_1); - bit_met_p5_p1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p5_m1, a_sq_p5_m1); - xmm1 = _mm_adds_epi16(xmm0, y0_m_5_1); - bit_met_p5_m1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p5_m3, a_sq_p5_m3); - xmm1 = _mm_adds_epi16(xmm0, y0_m_5_3); - bit_met_p5_m3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p5_m5, a_sq_p5_m5); - xmm1 = _mm_adds_epi16(xmm0, y0_m_5_5); - bit_met_p5_m5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p5_m7, a_sq_p5_m7); - xmm1 = _mm_adds_epi16(xmm0, y0_m_5_7); - bit_met_p5_m7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p3_p7, a_sq_p3_p7); - xmm1 = _mm_adds_epi16(xmm0, y0_p_3_7); - bit_met_p3_p7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p3_p5, a_sq_p3_p5); - xmm1 = _mm_adds_epi16(xmm0, y0_p_3_5); - bit_met_p3_p5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p3_p3, a_sq_p3_p3); - xmm1 = _mm_adds_epi16(xmm0, y0_p_3_3); - bit_met_p3_p3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p3_p1, a_sq_p3_p1); - xmm1 = _mm_adds_epi16(xmm0, y0_p_3_1); - bit_met_p3_p1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p3_m1, a_sq_p3_m1); - xmm1 = _mm_adds_epi16(xmm0, y0_m_3_1); - bit_met_p3_m1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p3_m3, a_sq_p3_m3); - xmm1 = _mm_adds_epi16(xmm0, y0_m_3_3); - bit_met_p3_m3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p3_m5, a_sq_p3_m5); - xmm1 = _mm_adds_epi16(xmm0, y0_m_3_5); - bit_met_p3_m5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p3_m7, a_sq_p3_m7); - xmm1 = _mm_adds_epi16(xmm0, y0_m_3_7); - bit_met_p3_m7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p1_p7, a_sq_p1_p7); - xmm1 = _mm_adds_epi16(xmm0, y0_p_1_7); - bit_met_p1_p7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p1_p5, a_sq_p1_p5); - xmm1 = _mm_adds_epi16(xmm0, y0_p_1_5); - bit_met_p1_p5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p1_p3, a_sq_p1_p3); - xmm1 = _mm_adds_epi16(xmm0, y0_p_1_3); - bit_met_p1_p3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p1_p1, a_sq_p1_p1); - xmm1 = _mm_adds_epi16(xmm0, y0_p_1_1); - bit_met_p1_p1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p1_m1, a_sq_p1_m1); - xmm1 = _mm_adds_epi16(xmm0, y0_m_1_1); - bit_met_p1_m1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p1_m3, a_sq_p1_m3); - xmm1 = _mm_adds_epi16(xmm0, y0_m_1_3); - bit_met_p1_m3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p1_m5, a_sq_p1_m5); - xmm1 = _mm_adds_epi16(xmm0, y0_m_1_5); - bit_met_p1_m5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_p1_m7, a_sq_p1_m7); - xmm1 = _mm_adds_epi16(xmm0, y0_m_1_7); - bit_met_p1_m7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - - xmm0 = _mm_subs_epi16(psi_a_m1_p7, a_sq_m1_p7); - xmm1 = _mm_subs_epi16(xmm0, y0_m_1_7); - bit_met_m1_p7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m1_p5, a_sq_m1_p5); - xmm1 = _mm_subs_epi16(xmm0, y0_m_1_5); - bit_met_m1_p5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m1_p3, a_sq_m1_p3); - xmm1 = _mm_subs_epi16(xmm0, y0_m_1_3); - bit_met_m1_p3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m1_p1, a_sq_m1_p1); - xmm1 = _mm_subs_epi16(xmm0, y0_m_1_1); - bit_met_m1_p1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m1_m1, a_sq_m1_m1); - xmm1 = _mm_subs_epi16(xmm0, y0_p_1_1); - bit_met_m1_m1 = _mm_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m1_m3, a_sq_m1_m3); - xmm1 = _mm_subs_epi16(xmm0, y0_p_1_3); - bit_met_m1_m3 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m1_m5, a_sq_m1_m5); - xmm1 = _mm_subs_epi16(xmm0, y0_p_1_5); - bit_met_m1_m5 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m1_m7, a_sq_m1_m7); - xmm1 = _mm_subs_epi16(xmm0, y0_p_1_7); - bit_met_m1_m7 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m3_p7, a_sq_m3_p7); - xmm1 = _mm_subs_epi16(xmm0, y0_m_3_7); - bit_met_m3_p7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m3_p5, a_sq_m3_p5); - xmm1 = _mm_subs_epi16(xmm0, y0_m_3_5); - bit_met_m3_p5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m3_p3, a_sq_m3_p3); - xmm1 = _mm_subs_epi16(xmm0, y0_m_3_3); - bit_met_m3_p3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m3_p1, a_sq_m3_p1); - xmm1 = _mm_subs_epi16(xmm0, y0_m_3_1); - bit_met_m3_p1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m3_m1, a_sq_m3_m1); - xmm1 = _mm_subs_epi16(xmm0, y0_p_3_1); - bit_met_m3_m1 = _mm_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m3_m3, a_sq_m3_m3); - xmm1 = _mm_subs_epi16(xmm0, y0_p_3_3); - bit_met_m3_m3 = _mm_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m3_m5, a_sq_m3_m5); - xmm1 = _mm_subs_epi16(xmm0, y0_p_3_5); - bit_met_m3_m5 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m3_m7, a_sq_m3_m7); - xmm1 = _mm_subs_epi16(xmm0, y0_p_3_7); - bit_met_m3_m7 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m5_p7, a_sq_m5_p7); - xmm1 = _mm_subs_epi16(xmm0, y0_m_5_7); - bit_met_m5_p7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m5_p5, a_sq_m5_p5); - xmm1 = _mm_subs_epi16(xmm0, y0_m_5_5); - bit_met_m5_p5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m5_p3, a_sq_m5_p3); - xmm1 = _mm_subs_epi16(xmm0, y0_m_5_3); - bit_met_m5_p3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m5_p1, a_sq_m5_p1); - xmm1 = _mm_subs_epi16(xmm0, y0_m_5_1); - bit_met_m5_p1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m5_m1, a_sq_m5_m1); - xmm1 = _mm_subs_epi16(xmm0, y0_p_5_1); - bit_met_m5_m1 = _mm_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m5_m3, a_sq_m5_m3); - xmm1 = _mm_subs_epi16(xmm0, y0_p_5_3); - bit_met_m5_m3 = _mm_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m5_m5, a_sq_m5_m5); - xmm1 = _mm_subs_epi16(xmm0, y0_p_5_5); - bit_met_m5_m5 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m5_m7, a_sq_m5_m7); - xmm1 = _mm_subs_epi16(xmm0, y0_p_5_7); - bit_met_m5_m7 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m7_p7, a_sq_m7_p7); - xmm1 = _mm_subs_epi16(xmm0, y0_m_7_7); - bit_met_m7_p7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m7_p5, a_sq_m7_p5); - xmm1 = _mm_subs_epi16(xmm0, y0_m_7_5); - bit_met_m7_p5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m7_p3, a_sq_m7_p3); - xmm1 = _mm_subs_epi16(xmm0, y0_m_7_3); - bit_met_m7_p3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m7_p1, a_sq_m7_p1); - xmm1 = _mm_subs_epi16(xmm0, y0_m_7_1); - bit_met_m7_p1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m7_m1, a_sq_m7_m1); - xmm1 = _mm_subs_epi16(xmm0, y0_p_7_1); - bit_met_m7_m1 = _mm_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m7_m3, a_sq_m7_m3); - xmm1 = _mm_subs_epi16(xmm0, y0_p_7_3); - bit_met_m7_m3 = _mm_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m7_m5, a_sq_m7_m5); - xmm1 = _mm_subs_epi16(xmm0, y0_p_7_5); - bit_met_m7_m5 = _mm_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); - xmm0 = _mm_subs_epi16(psi_a_m7_m7, a_sq_m7_m7); - xmm1 = _mm_subs_epi16(xmm0, y0_p_7_7); - bit_met_m7_m7 = _mm_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); - - // Detection for 1st bit (LTE mapping) - // bit = 1 - xmm0 = _mm_max_epi16(bit_met_m7_p7, bit_met_m7_p5); - xmm1 = _mm_max_epi16(bit_met_m7_p3, bit_met_m7_p1); - xmm2 = _mm_max_epi16(bit_met_m7_m1, bit_met_m7_m3); - xmm3 = _mm_max_epi16(bit_met_m7_m5, bit_met_m7_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_m5_p7, bit_met_m5_p5); - xmm1 = _mm_max_epi16(bit_met_m5_p3, bit_met_m5_p1); - xmm2 = _mm_max_epi16(bit_met_m5_m1, bit_met_m5_m3); - xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m5_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_m3_p7, bit_met_m3_p5); - xmm1 = _mm_max_epi16(bit_met_m3_p3, bit_met_m3_p1); - xmm2 = _mm_max_epi16(bit_met_m3_m1, bit_met_m3_m3); - xmm3 = _mm_max_epi16(bit_met_m3_m5, bit_met_m3_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_m1_p7, bit_met_m1_p5); - xmm1 = _mm_max_epi16(bit_met_m1_p3, bit_met_m1_p1); - xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m1_m3); - xmm3 = _mm_max_epi16(bit_met_m1_m5, bit_met_m1_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - - // bit = 0 - xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p7_p5); - xmm1 = _mm_max_epi16(bit_met_p7_p3, bit_met_p7_p1); - xmm2 = _mm_max_epi16(bit_met_p7_m1, bit_met_p7_m3); - xmm3 = _mm_max_epi16(bit_met_p7_m5, bit_met_p7_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_p5_p7, bit_met_p5_p5); - xmm1 = _mm_max_epi16(bit_met_p5_p3, bit_met_p5_p1); - xmm2 = _mm_max_epi16(bit_met_p5_m1, bit_met_p5_m3); - xmm3 = _mm_max_epi16(bit_met_p5_m5, bit_met_p5_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p3_p7, bit_met_p3_p5); - xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p3_p1); - xmm2 = _mm_max_epi16(bit_met_p3_m1, bit_met_p3_m3); - xmm3 = _mm_max_epi16(bit_met_p3_m5, bit_met_p3_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p1_p7, bit_met_p1_p5); - xmm1 = _mm_max_epi16(bit_met_p1_p3, bit_met_p1_p1); - xmm2 = _mm_max_epi16(bit_met_p1_m1, bit_met_p1_m3); - xmm3 = _mm_max_epi16(bit_met_p1_m5, bit_met_p1_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - - y0r = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); - - // Detection for 2nd bit (LTE mapping) - // bit = 1 - xmm0 = _mm_max_epi16(bit_met_p7_m1, bit_met_p5_m1); - xmm1 = _mm_max_epi16(bit_met_p3_m1, bit_met_p1_m1); - xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m3_m1); - xmm3 = _mm_max_epi16(bit_met_m5_m1, bit_met_m7_m1); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m3, bit_met_p5_m3); - xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p1_m3); - xmm2 = _mm_max_epi16(bit_met_m1_m3, bit_met_m3_m3); - xmm3 = _mm_max_epi16(bit_met_m5_m3, bit_met_m7_m3); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m5, bit_met_p5_m5); - xmm1 = _mm_max_epi16(bit_met_p3_m5, bit_met_p1_m5); - xmm2 = _mm_max_epi16(bit_met_m1_m5, bit_met_m3_m5); - xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m7_m5); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p5_m7); - xmm1 = _mm_max_epi16(bit_met_p3_m7, bit_met_p1_m7); - xmm2 = _mm_max_epi16(bit_met_m1_m7, bit_met_m3_m7); - xmm3 = _mm_max_epi16(bit_met_m5_m7, bit_met_m7_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - - // bit = 0 - xmm0 = _mm_max_epi16(bit_met_p7_p1, bit_met_p5_p1); - xmm1 = _mm_max_epi16(bit_met_p3_p1, bit_met_p1_p1); - xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m3_p1); - xmm3 = _mm_max_epi16(bit_met_m5_p1, bit_met_m7_p1); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_p3, bit_met_p5_p3); - xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p1_p3); - xmm2 = _mm_max_epi16(bit_met_m1_p3, bit_met_m3_p3); - xmm3 = _mm_max_epi16(bit_met_m5_p3, bit_met_m7_p3); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_p5, bit_met_p5_p5); - xmm1 = _mm_max_epi16(bit_met_p3_p5, bit_met_p1_p5); - xmm2 = _mm_max_epi16(bit_met_m1_p5, bit_met_m3_p5); - xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m7_p5); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p5_p7); - xmm1 = _mm_max_epi16(bit_met_p3_p7, bit_met_p1_p7); - xmm2 = _mm_max_epi16(bit_met_m1_p7, bit_met_m3_p7); - xmm3 = _mm_max_epi16(bit_met_m5_p7, bit_met_m7_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - - y1r = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); - - // Detection for 3rd bit (LTE mapping) - xmm0 = _mm_max_epi16(bit_met_m7_m7, bit_met_m7_m5); - xmm1 = _mm_max_epi16(bit_met_m7_m3, bit_met_m7_m1); - xmm2 = _mm_max_epi16(bit_met_m7_p1, bit_met_m7_p3); - xmm3 = _mm_max_epi16(bit_met_m7_p5, bit_met_m7_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_m5_m7, bit_met_m5_m5); - xmm1 = _mm_max_epi16(bit_met_m5_m3, bit_met_m5_m1); - xmm2 = _mm_max_epi16(bit_met_m5_p1, bit_met_m5_p3); - xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m5_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p5_m7, bit_met_p5_m5); - xmm1 = _mm_max_epi16(bit_met_p5_m3, bit_met_p5_m1); - xmm2 = _mm_max_epi16(bit_met_p5_p1, bit_met_p5_p3); - xmm3 = _mm_max_epi16(bit_met_p5_p5, bit_met_p5_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p7_m5); - xmm1 = _mm_max_epi16(bit_met_p7_m3, bit_met_p7_m1); - xmm2 = _mm_max_epi16(bit_met_p7_p1, bit_met_p7_p3); - xmm3 = _mm_max_epi16(bit_met_p7_p5, bit_met_p7_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - - xmm0 = _mm_max_epi16(bit_met_m3_m7, bit_met_m3_m5); - xmm1 = _mm_max_epi16(bit_met_m3_m3, bit_met_m3_m1); - xmm2 = _mm_max_epi16(bit_met_m3_p1, bit_met_m3_p3); - xmm3 = _mm_max_epi16(bit_met_m3_p5, bit_met_m3_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_m1_m7, bit_met_m1_m5); - xmm1 = _mm_max_epi16(bit_met_m1_m3, bit_met_m1_m1); - xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m1_p3); - xmm3 = _mm_max_epi16(bit_met_m1_p5, bit_met_m1_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p1_m7, bit_met_p1_m5); - xmm1 = _mm_max_epi16(bit_met_p1_m3, bit_met_p1_m1); - xmm2 = _mm_max_epi16(bit_met_p1_p1, bit_met_p1_p3); - xmm3 = _mm_max_epi16(bit_met_p1_p5, bit_met_p1_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p3_m7, bit_met_p3_m5); - xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p3_m1); - xmm2 = _mm_max_epi16(bit_met_p3_p1, bit_met_p3_p3); - xmm3 = _mm_max_epi16(bit_met_p3_p5, bit_met_p3_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - - y2r = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); - - // Detection for 4th bit (LTE mapping) - xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p5_p7); - xmm1 = _mm_max_epi16(bit_met_p3_p7, bit_met_p1_p7); - xmm2 = _mm_max_epi16(bit_met_m1_p7, bit_met_m3_p7); - xmm3 = _mm_max_epi16(bit_met_m5_p7, bit_met_m7_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_p5, bit_met_p5_p5); - xmm1 = _mm_max_epi16(bit_met_p3_p5, bit_met_p1_p5); - xmm2 = _mm_max_epi16(bit_met_m1_p5, bit_met_m3_p5); - xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m5_p5); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m5, bit_met_p5_m5); - xmm1 = _mm_max_epi16(bit_met_p3_m5, bit_met_p1_m5); - xmm2 = _mm_max_epi16(bit_met_m1_m5, bit_met_m3_m5); - xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m7_m5); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p5_m7); - xmm1 = _mm_max_epi16(bit_met_p3_m7, bit_met_p1_m7); - xmm2 = _mm_max_epi16(bit_met_m1_m7, bit_met_m3_m7); - xmm3 = _mm_max_epi16(bit_met_m5_m7, bit_met_m7_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - - xmm0 = _mm_max_epi16(bit_met_p7_m1, bit_met_p5_m1); - xmm1 = _mm_max_epi16(bit_met_p3_m1, bit_met_p1_m1); - xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m3_m1); - xmm3 = _mm_max_epi16(bit_met_m5_m1, bit_met_m7_m1); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m3, bit_met_p5_m3); - xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p1_m3); - xmm2 = _mm_max_epi16(bit_met_m1_m3, bit_met_m3_m3); - xmm3 = _mm_max_epi16(bit_met_m5_m3, bit_met_m7_m3); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_p1, bit_met_p5_p1); - xmm1 = _mm_max_epi16(bit_met_p3_p1, bit_met_p1_p1); - xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m3_p1); - xmm3 = _mm_max_epi16(bit_met_m5_p1, bit_met_m7_p1); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_p3, bit_met_p5_p3); - xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p1_p3); - xmm2 = _mm_max_epi16(bit_met_m1_p3, bit_met_m3_p3); - xmm3 = _mm_max_epi16(bit_met_m5_p3, bit_met_m7_p3); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - - y0i = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); - - - // Detection for 5th bit (LTE mapping) - xmm0 = _mm_max_epi16(bit_met_m7_m7, bit_met_m7_m5); - xmm1 = _mm_max_epi16(bit_met_m7_m3, bit_met_m7_m1); - xmm2 = _mm_max_epi16(bit_met_m7_p1, bit_met_m7_p3); - xmm3 = _mm_max_epi16(bit_met_m7_p5, bit_met_m7_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_m1_m7, bit_met_m1_m5); - xmm1 = _mm_max_epi16(bit_met_m1_m3, bit_met_m1_m1); - xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m1_p3); - xmm3 = _mm_max_epi16(bit_met_m1_p5, bit_met_m1_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p1_m7, bit_met_p1_m5); - xmm1 = _mm_max_epi16(bit_met_p1_m3, bit_met_p1_m1); - xmm2 = _mm_max_epi16(bit_met_p1_p1, bit_met_p1_p3); - xmm3 = _mm_max_epi16(bit_met_p1_p5, bit_met_p1_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p7_m5); - xmm1 = _mm_max_epi16(bit_met_p7_m3, bit_met_p7_m1); - xmm2 = _mm_max_epi16(bit_met_p7_p1, bit_met_p7_p3); - xmm3 = _mm_max_epi16(bit_met_p7_p5, bit_met_p7_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - - xmm0 = _mm_max_epi16(bit_met_m5_m7, bit_met_m5_m5); - xmm1 = _mm_max_epi16(bit_met_m5_m3, bit_met_m5_m1); - xmm2 = _mm_max_epi16(bit_met_m5_p1, bit_met_m5_p3); - xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m5_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_m3_m7, bit_met_m3_m5); - xmm1 = _mm_max_epi16(bit_met_m3_m3, bit_met_m3_m1); - xmm2 = _mm_max_epi16(bit_met_m3_p1, bit_met_m3_p3); - xmm3 = _mm_max_epi16(bit_met_m3_p5, bit_met_m3_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p3_m7, bit_met_p3_m5); - xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p3_m1); - xmm2 = _mm_max_epi16(bit_met_p3_p1, bit_met_p3_p3); - xmm3 = _mm_max_epi16(bit_met_p3_p5, bit_met_p3_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p5_m7, bit_met_p5_m5); - xmm1 = _mm_max_epi16(bit_met_p5_m3, bit_met_p5_m1); - xmm2 = _mm_max_epi16(bit_met_p5_p1, bit_met_p5_p3); - xmm3 = _mm_max_epi16(bit_met_p5_p5, bit_met_p5_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - - y1i = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); - - // Detection for 6th bit (LTE mapping) - xmm0 = _mm_max_epi16(bit_met_p7_p7, bit_met_p5_p7); - xmm1 = _mm_max_epi16(bit_met_p3_p7, bit_met_p1_p7); - xmm2 = _mm_max_epi16(bit_met_m1_p7, bit_met_m3_p7); - xmm3 = _mm_max_epi16(bit_met_m5_p7, bit_met_m7_p7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_p1, bit_met_p5_p1); - xmm1 = _mm_max_epi16(bit_met_p3_p1, bit_met_p1_p1); - xmm2 = _mm_max_epi16(bit_met_m1_p1, bit_met_m3_p1); - xmm3 = _mm_max_epi16(bit_met_m5_p1, bit_met_m5_p1); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m1, bit_met_p5_m1); - xmm1 = _mm_max_epi16(bit_met_p3_m1, bit_met_p1_m1); - xmm2 = _mm_max_epi16(bit_met_m1_m1, bit_met_m3_m1); - xmm3 = _mm_max_epi16(bit_met_m5_m1, bit_met_m7_m1); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m7, bit_met_p5_m7); - xmm1 = _mm_max_epi16(bit_met_p3_m7, bit_met_p1_m7); - xmm2 = _mm_max_epi16(bit_met_m1_m7, bit_met_m3_m7); - xmm3 = _mm_max_epi16(bit_met_m5_m7, bit_met_m7_m7); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm4); - logmax_den_re0 = _mm_max_epi16(logmax_den_re0, xmm5); - - xmm0 = _mm_max_epi16(bit_met_p7_m5, bit_met_p5_m5); - xmm1 = _mm_max_epi16(bit_met_p3_m5, bit_met_p1_m5); - xmm2 = _mm_max_epi16(bit_met_m1_m5, bit_met_m3_m5); - xmm3 = _mm_max_epi16(bit_met_m5_m5, bit_met_m7_m5); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(xmm4, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_m3, bit_met_p5_m3); - xmm1 = _mm_max_epi16(bit_met_p3_m3, bit_met_p1_m3); - xmm2 = _mm_max_epi16(bit_met_m1_m3, bit_met_m3_m3); - xmm3 = _mm_max_epi16(bit_met_m5_m3, bit_met_m7_m3); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_p3, bit_met_p5_p3); - xmm1 = _mm_max_epi16(bit_met_p3_p3, bit_met_p1_p3); - xmm2 = _mm_max_epi16(bit_met_m1_p3, bit_met_m3_p3); - xmm3 = _mm_max_epi16(bit_met_m5_p3, bit_met_m7_p3); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - xmm0 = _mm_max_epi16(bit_met_p7_p5, bit_met_p5_p5); - xmm1 = _mm_max_epi16(bit_met_p3_p5, bit_met_p1_p5); - xmm2 = _mm_max_epi16(bit_met_m1_p5, bit_met_m3_p5); - xmm3 = _mm_max_epi16(bit_met_m5_p5, bit_met_m7_p5); - xmm4 = _mm_max_epi16(xmm0, xmm1); - xmm5 = _mm_max_epi16(xmm2, xmm3); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm4); - logmax_num_re0 = _mm_max_epi16(logmax_num_re0, xmm5); - - y2i = _mm_subs_epi16(logmax_num_re0, logmax_den_re0); - - - // map to output stream, difficult to do in SIMD since we have 6 16bit LLRs - // RE 1 - j = 24*i; - stream0_out[j + 0] = ((short *)&y0r)[0]; - stream0_out[j + 1] = ((short *)&y1r)[0]; - stream0_out[j + 2] = ((short *)&y2r)[0]; - stream0_out[j + 3] = ((short *)&y0i)[0]; - stream0_out[j + 4] = ((short *)&y1i)[0]; - stream0_out[j + 5] = ((short *)&y2i)[0]; - // RE 2 - stream0_out[j + 6] = ((short *)&y0r)[1]; - stream0_out[j + 7] = ((short *)&y1r)[1]; - stream0_out[j + 8] = ((short *)&y2r)[1]; - stream0_out[j + 9] = ((short *)&y0i)[1]; - stream0_out[j + 10] = ((short *)&y1i)[1]; - stream0_out[j + 11] = ((short *)&y2i)[1]; - // RE 3 - stream0_out[j + 12] = ((short *)&y0r)[2]; - stream0_out[j + 13] = ((short *)&y1r)[2]; - stream0_out[j + 14] = ((short *)&y2r)[2]; - stream0_out[j + 15] = ((short *)&y0i)[2]; - stream0_out[j + 16] = ((short *)&y1i)[2]; - stream0_out[j + 17] = ((short *)&y2i)[2]; - // RE 4 - stream0_out[j + 18] = ((short *)&y0r)[3]; - stream0_out[j + 19] = ((short *)&y1r)[3]; - stream0_out[j + 20] = ((short *)&y2r)[3]; - stream0_out[j + 21] = ((short *)&y0i)[3]; - stream0_out[j + 22] = ((short *)&y1i)[3]; - stream0_out[j + 23] = ((short *)&y2i)[3]; - // RE 5 - stream0_out[j + 24] = ((short *)&y0r)[4]; - stream0_out[j + 25] = ((short *)&y1r)[4]; - stream0_out[j + 26] = ((short *)&y2r)[4]; - stream0_out[j + 27] = ((short *)&y0i)[4]; - stream0_out[j + 28] = ((short *)&y1i)[4]; - stream0_out[j + 29] = ((short *)&y2i)[4]; - // RE 6 - stream0_out[j + 30] = ((short *)&y0r)[5]; - stream0_out[j + 31] = ((short *)&y1r)[5]; - stream0_out[j + 32] = ((short *)&y2r)[5]; - stream0_out[j + 33] = ((short *)&y0i)[5]; - stream0_out[j + 34] = ((short *)&y1i)[5]; - stream0_out[j + 35] = ((short *)&y2i)[5]; - // RE 7 - stream0_out[j + 36] = ((short *)&y0r)[6]; - stream0_out[j + 37] = ((short *)&y1r)[6]; - stream0_out[j + 38] = ((short *)&y2r)[6]; - stream0_out[j + 39] = ((short *)&y0i)[6]; - stream0_out[j + 40] = ((short *)&y1i)[6]; - stream0_out[j + 41] = ((short *)&y2i)[6]; - // RE 8 - stream0_out[j + 42] = ((short *)&y0r)[7]; - stream0_out[j + 43] = ((short *)&y1r)[7]; - stream0_out[j + 44] = ((short *)&y2r)[7]; - stream0_out[j + 45] = ((short *)&y0i)[7]; - stream0_out[j + 46] = ((short *)&y1i)[7]; - stream0_out[j + 47] = ((short *)&y2i)[7]; - -#elif defined(__arm__) || defined(__aarch64__) - -#endif - - } - -#if defined(__x86_64__) || defined(__i386__) - _mm_empty(); - _m_empty(); -#endif -} -#endif - - -int nr_dlsch_64qam_64qam_llr(NR_DL_FRAME_PARMS *frame_parms, - int32_t **rxdataF_comp, - int32_t **rxdataF_comp_i, - int32_t **dl_ch_mag, - int32_t **dl_ch_mag_i, - int32_t **rho_i, - int16_t *dlsch_llr, - uint8_t symbol, - uint32_t len, - uint8_t first_symbol_flag, - uint16_t nb_rb, - uint16_t pbch_pss_sss_adjust, - //int16_t **llr16p, - uint32_t llr_offset) -{ - - int16_t *rxF = (int16_t*)&rxdataF_comp[0][(symbol*nb_rb*12)]; - int16_t *rxF_i = (int16_t*)&rxdataF_comp_i[0][(symbol*nb_rb*12)]; - int16_t *ch_mag = (int16_t*)&dl_ch_mag[0][(symbol*nb_rb*12)]; - int16_t *ch_mag_i = (int16_t*)&dl_ch_mag_i[0][(symbol*nb_rb*12)]; - int16_t *rho = (int16_t*)&rho_i[0][(symbol*nb_rb*12)]; - int16_t *llr16; - int8_t *pllr_symbol; // pointer where llrs should filled for this ofdm symbol - - //first symbol has different structure due to more pilots - /*if (first_symbol_flag == 1) { - llr16 = (int16_t*)dlsch_llr; - } else { - llr16 = (int16_t*)(*llr16p); - }*/ - - llr16 = (int16_t*)dlsch_llr; - - AssertFatal(llr16!=NULL,"nr_dlsch_16qam_64qam_llr:llr is null, symbol %d\n",symbol); - - - pllr_symbol = (int8_t*)dlsch_llr; - pllr_symbol += llr_offset; - //printf("nr_dlsch_64qam_64qam_llr: symbol %d,nb_rb %d, len %d,pbch_pss_sss_adjust %d\n",symbol,nb_rb,len,pbch_pss_sss_adjust); - /*LOG_I(PHY,"nr_dlsch_64qam_64qam_llr [symb %d / FirstSym %d / Length %d / LLR Offset %d]: @LLR Buff %x, @LLR Buff(symb) %x, , @Compute LLR Buff(symb) %x \n", - symbol, - first_symbol_flag, - len, - llr_offset, - (int16_t*)dlsch_llr, - llr16, - pllr_symbol);*/ - - // Round length up to multiple of 16 words - uint32_t len256i = ((len+16)>>4)*16; - int32_t *rxF_256i = (int32_t*) malloc16_clear(len256i*4); - int32_t *rxF_i_256i = (int32_t*) malloc16_clear(len256i*4); - int32_t *ch_mag_256i = (int32_t*) malloc16_clear(len256i*4); - int32_t *ch_mag_i_256i = (int32_t*) malloc16_clear(len256i*4); - int32_t *rho_256i = (int32_t*) malloc16_clear(len256i*4); - - memcpy(rxF_256i, rxF, len*4); - memcpy(rxF_i_256i, rxF_i, len*4); - memcpy(ch_mag_256i, ch_mag, len*4); - memcpy(ch_mag_i_256i, ch_mag_i, len*4); - memcpy(rho_256i, rho, len*4); - -#if 0 - qam64_qam16_avx2((short *)rxF_256i, - (short *)rxF_i_256i, - (short *)ch_mag_256i, - (short *)ch_mag_i_256i, - (short *)llr16, - (short *) rho_256i, - len); -#else - qam64_qam64_avx2((int32_t *)rxF_256i, - (int32_t *)rxF_i_256i, - (int32_t *)ch_mag_256i, - (int32_t *)ch_mag_i_256i, - (int16_t *)llr16, - (int32_t *) rho_256i, - len); -#endif - - free16(rxF_256i, sizeof(rxF_256i)); - free16(rxF_i_256i, sizeof(rxF_i_256i)); - free16(ch_mag_256i, sizeof(ch_mag_256i)); - free16(ch_mag_i_256i, sizeof(ch_mag_i_256i)); - free16(rho_256i, sizeof(rho_256i)); - - llr16 += (6*len); - //*llr16p = (short *)llr16; - - return(0); -} diff --git a/openair1/PHY/NR_UE_TRANSPORT/nr_transport_proto_ue.h b/openair1/PHY/NR_UE_TRANSPORT/nr_transport_proto_ue.h index c4dc766f94a..7432c82aac9 100644 --- a/openair1/PHY/NR_UE_TRANSPORT/nr_transport_proto_ue.h +++ b/openair1/PHY/NR_UE_TRANSPORT/nr_transport_proto_ue.h @@ -87,216 +87,6 @@ int32_t nr_dlsch_qpsk_qpsk_llr(NR_DL_FRAME_PARMS *frame_parms, uint16_t pbch_pss_sss_adj, int16_t **llr128p); -/** \brief This function computes the LLRs for ML (max-logsum approximation) dual-stream QPSK/16QAM reception. - @param stream0_in Input from channel compensated (MR combined) stream 0 - @param stream1_in Input from channel compensated (MR combined) stream 1 - @param ch_mag_i Input from scaled channel magnitude square of h0'*g1 - @param stream0_out Output from LLR unit for stream0 - @param rho01 Cross-correlation between channels (MR combined) - @param length in complex channel outputs*/ -void nr_qpsk_qam16(int16_t *stream0_in, - int16_t *stream1_in, - short *ch_mag_i, - int16_t *stream0_out, - int16_t *rho01, - int32_t length); - -/** \brief This function computes the LLRs for ML (max-logsum approximation) dual-stream QPSK/64QAM reception. - @param stream0_in Input from channel compensated (MR combined) stream 0 - @param stream1_in Input from channel compensated (MR combined) stream 1 - @param ch_mag_i Input from scaled channel magnitude square of h0'*g1 - @param stream0_out Output from LLR unit for stream0 - @param rho01 Cross-correlation between channels (MR combined) - @param length in complex channel outputs*/ -void nr_qpsk_qam64(int16_t *stream0_in, - int16_t *stream1_in, - short *ch_mag_i, - int16_t *stream0_out, - int16_t *rho01, - int32_t length); - -/** \brief This function computes the LLRs for ML (max-logsum approximation) dual-stream 16QAM/QPSK reception. - @param stream0_in Input from channel compensated (MR combined) stream 0 - @param stream1_in Input from channel compensated (MR combined) stream 1 - @param ch_mag Input from scaled channel magnitude square of h0'*g0 - @param stream0_out Output from LLR unit for stream0 - @param rho01 Cross-correlation between channels (MR combined) - @param length in complex channel outputs*/ -void nr_qam16_qpsk(short *stream0_in, short *stream1_in, short *ch_mag, short *stream0_out, short *rho01, int length); - -/** \brief This function computes the LLRs for ML (max-logsum approximation) dual-stream 16QAM/16QAM reception. - @param stream0_in Input from channel compensated (MR combined) stream 0 - @param stream1_in Input from channel compensated (MR combined) stream 1 - @param ch_mag Input from scaled channel magnitude square of h0'*g0 - @param ch_mag_i Input from scaled channel magnitude square of h0'*g1 - @param stream0_out Output from LLR unit for stream0 - @param rho01 Cross-correlation between channels (MR combined) - @param length in complex channel outputs*/ -void nr_qam16_qam16(short *stream0_in, - short *stream1_in, - short *ch_mag, - short *ch_mag_i, - short *stream0_out, - short *rho01, - int length); - -/** \brief This function computes the LLRs for ML (max-logsum approximation) dual-stream 64QAM/64QAM reception. - @param stream0_in Input from channel compensated (MR combined) stream 0 - @param stream1_in Input from channel compensated (MR combined) stream 1 - @param ch_mag Input from scaled channel magnitude square of h0'*g0 - @param stream0_out Output from LLR unit for stream0 - @param rho01 Cross-correlation between channels (MR combined) - @param length in complex channel outputs*/ -void nr_qam64_qpsk(short *stream0_in, - short *stream1_in, - short *ch_mag, - short *stream0_out, - short *rho01, - int length); - -/** \brief This function perform LLR computation for dual-stream (64QAM/64QAM) transmission. - @param frame_parms Frame descriptor structure - @param rxdataF_comp Compensated channel output - @param rxdataF_comp_i Compensated channel output for interference - @param ch_mag Input from scaled channel magnitude square of h0'*g0 - @param rho_i Correlation between channel of signal and inteference - @param dlsch_llr llr output - @param symbol OFDM symbol index in sub-frame - @param first_symbol_flag flag to indicate this is the first symbol of the dlsch - @param nb_rb number of RBs for this allocation - @param pbch_pss_sss_adj Number of channel bits taken by PBCH/PSS/SSS - @param llr16p pointer to pointer to symbol in dlsch_llr*/ -int nr_dlsch_64qam_qpsk_llr(NR_DL_FRAME_PARMS *frame_parms, - int **rxdataF_comp, - int **rxdataF_comp_i, - int **dl_ch_mag, - int **rho_i, - short *dlsch_llr, - unsigned char symbol, - unsigned char first_symbol_flag, - unsigned short nb_rb, - uint16_t pbch_pss_sss_adjust, - short **llr16p); - -/** \brief This function computes the LLRs for ML (max-logsum approximation) dual-stream 64QAM/16QAM reception. - @param stream0_in Input from channel compensated (MR combined) stream 0 - @param stream1_in Input from channel compensated (MR combined) stream 1 - @param ch_mag Input from scaled channel magnitude square of h0'*g0 - @param ch_mag_i Input from scaled channel magnitude square of h0'*g1 - @param stream0_out Output from LLR unit for stream0 - @param rho01 Cross-correlation between channels (MR combined) - @param length in complex channel outputs*/ -void nr_qam64_qam16(short *stream0_in, - short *stream1_in, - short *ch_mag, - short *ch_mag_i, - short *stream0_out, - short *rho01, - int length); - -/** \brief This function computes the LLRs for ML (max-logsum approximation) dual-stream 64QAM/16QAM reception. - @param stream0_in Input from channel compensated (MR combined) stream 0 - @param stream1_in Input from channel compensated (MR combined) stream 1 - @param ch_mag Input from scaled channel magnitude square of h0'*g0 - @param ch_mag_i Input from scaled channel magnitude square of h0'*g1 - @param stream0_out Output from LLR unit for stream0 - @param rho01 Cross-correlation between channels (MR combined) - @param length in complex channel outputs*/ -void qam64_qam16_avx2(short *stream0_in, - short *stream1_in, - short *ch_mag, - short *ch_mag_i, - short *stream0_out, - short *rho01, - int length); - -/** \brief This function perform LLR computation for dual-stream (64QAM/16QAM) transmission. - @param frame_parms Frame descriptor structure - @param rxdataF_comp Compensated channel output - @param rxdataF_comp_i Compensated channel output for interference - @param ch_mag Input from scaled channel magnitude square of h0'*g0 - @param ch_mag_i Input from scaled channel magnitude square of h0'*g1 - @param rho_i Correlation between channel of signal and inteference - @param dlsch_llr llr output - @param symbol OFDM symbol index in sub-frame - @param first_symbol_flag flag to indicate this is the first symbol of the dlsch - @param nb_rb number of RBs for this allocation - @param pbch_pss_sss_adj Number of channel bits taken by PBCH/PSS/SSS - @param llr16p pointer to pointer to symbol in dlsch_llr*/ -int nr_dlsch_64qam_16qam_llr(NR_DL_FRAME_PARMS *frame_parms, - int **rxdataF_comp, - int **rxdataF_comp_i, - int **dl_ch_mag, - int **dl_ch_mag_i, - int **rho_i, - short *dlsch_llr, - unsigned char symbol, - unsigned char first_symbol_flag, - unsigned short nb_rb, - uint16_t pbch_pss_sss_adjust, - short **llr16p); - -/** \brief This function computes the LLRs for ML (max-logsum approximation) dual-stream 64QAM/64QAM reception. - @param stream0_in Input from channel compensated (MR combined) stream 0 - @param stream1_in Input from channel compensated (MR combined) stream 1 - @param ch_mag Input from scaled channel magnitude square of h0'*g0 - @param ch_mag_i Input from scaled channel magnitude square of h0'*g1 - @param stream0_out Output from LLR unit for stream0 - @param rho01 Cross-correlation between channels (MR combined) - @param length in complex channel outputs*/ -void qam64_qam64(short *stream0_in, - short *stream1_in, - short *ch_mag, - short *ch_mag_i, - short *stream0_out, - short *rho01, - int length); - -/** \brief This function computes the LLRs for ML (max-logsum approximation) dual-stream 64QAM/64QAM reception. - @param stream0_in Input from channel compensated (MR combined) stream 0 - @param stream1_in Input from channel compensated (MR combined) stream 1 - @param ch_mag Input from scaled channel magnitude square of h0'*g0 - @param ch_mag_i Input from scaled channel magnitude square of h0'*g1 - @param stream0_out Output from LLR unit for stream0 - @param rho01 Cross-correlation between channels (MR combined) - @param length in complex channel outputs*/ -void qam64_qam64_avx2(int32_t *stream0_in, - int32_t *stream1_in, - int32_t *ch_mag, - int32_t *ch_mag_i, - int16_t *stream0_out, - int32_t *rho01, - int length); - -/** \brief This function perform LLR computation for dual-stream (64QAM/64QAM) transmission. - @param frame_parms Frame descriptor structure - @param rxdataF_comp Compensated channel output - @param rxdataF_comp_i Compensated channel output for interference - @param ch_mag Input from scaled channel magnitude square of h0'*g0 - @param ch_mag_i Input from scaled channel magnitude square of h0'*g1 - @param rho_i Correlation between channel of signal and inteference - @param dlsch_llr llr output - @param symbol OFDM symbol index in sub-frame - @param first_symbol_flag flag to indicate this is the first symbol of the dlsch - @param nb_rb number of RBs for this allocation - @param pbch_pss_sss_adj Number of channel bits taken by PBCH/PSS/SSS - @param llr16p pointer to pointer to symbol in dlsch_llr*/ -int nr_dlsch_64qam_64qam_llr(NR_DL_FRAME_PARMS *frame_parms, - int **rxdataF_comp, - int **rxdataF_comp_i, - int **dl_ch_mag, - int **dl_ch_mag_i, - int **rho_i, - short *dlsch_llr, - unsigned char symbol, - uint32_t len, - unsigned char first_symbol_flag, - unsigned short nb_rb, - uint16_t pbch_pss_sss_adjust, - //short **llr16p, - uint32_t llr_offset); - - /** \brief This function generates log-likelihood ratios (decoder input) for single-stream QPSK received waveforms. @param frame_parms Frame descriptor structure @param rxdataF_comp Compensated channel output @@ -605,6 +395,7 @@ int nr_rx_pdsch(PHY_VARS_NR_UE *ue, int32_t generate_nr_prach(PHY_VARS_NR_UE *ue, uint8_t gNB_id, int frame, uint8_t slot); void dump_nrdlsch(PHY_VARS_NR_UE *ue,uint8_t gNB_id,uint8_t nr_slot_rx,unsigned int *coded_bits_per_codeword,int round, unsigned char harq_pid); +void nr_a_sum_b(c16_t *input_x, c16_t *input_y, unsigned short nb_rb); /**@}*/ #endif -- GitLab