diff --git a/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation.c b/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation.c index c33dec87d3b41f2ed6afc2d8df5e583f3ca181da..240758ecebbc97a42dc6d1c4331b24848dbca343 100644 --- a/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation.c +++ b/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation.c @@ -8831,15 +8831,8 @@ int dlsch_64qam_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms, len = (nb_rb*12) - pbch_pss_sss_adjust; } -#if 0 - qam64_qam64((short *)rxF, - (short *)rxF_i, - (short *)ch_mag, - (short *)ch_mag_i, - (short *)llr16, - (short *)rho, - len); -#else +//#ifdef __AVX2__ + // Round length up to multiple of 16 words uint32_t len256i = ((len+16)>>4)*16; int32_t *rxF_256i = (int32_t*) malloc16_clear(len256i*4); @@ -8853,7 +8846,7 @@ int dlsch_64qam_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms, memcpy(ch_mag_256i, ch_mag, len*4); memcpy(ch_mag_i_256i, ch_mag_i, len*4); memcpy(rho_256i, rho, len*4); - + /* qam64_qam64_avx2((int32_t *)rxF_256i, (int32_t *)rxF_i_256i, (int32_t *)ch_mag_256i, @@ -8861,13 +8854,34 @@ int dlsch_64qam_64qam_llr(LTE_DL_FRAME_PARMS *frame_parms, (int16_t *)llr16, (int32_t *) rho_256i, len); - + */ + qam64_qam16_avx2((short *)rxF_256i, + (short *)rxF_i_256i, + (short *)ch_mag_256i, + (short *)ch_mag_i_256i, + (short *)llr16, + (short *)rho_256i, + len); + free16(rxF_256i, sizeof(rxF_256i)); free16(rxF_i_256i, sizeof(rxF_i_256i)); free16(ch_mag_256i, sizeof(ch_mag_256i)); free16(ch_mag_i_256i, sizeof(ch_mag_i_256i)); free16(rho_256i, sizeof(rho_256i)); -#endif + +//#else +/* + qam64_qam64((short *)rxF, + (short *)rxF_i, + (short *)ch_mag, + (short *)ch_mag_i, + (short *)llr16, + (short *)rho, + len); +*/ + +//#endif + llr16 += (6*len); *llr16p = (short *)llr16; return(0); diff --git a/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation_avx2.c b/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation_avx2.c new file mode 100644 index 0000000000000000000000000000000000000000..cda5ad0f55ee8afbe37bf4d3536cea17fb3ed575 --- /dev/null +++ b/openair1/PHY/LTE_TRANSPORT/dlsch_llr_computation_avx2.c @@ -0,0 +1,4034 @@ + /* + * Licensed to the OpenAirInterface (OAI) Software Alliance under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The OpenAirInterface Software Alliance licenses this file to You under + * the OAI Public License, Version 1.0 (the "License"); you may not use this file + * except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.openairinterface.org/?page_id=698 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------------------- + * For more information about the OpenAirInterface (OAI) Software Alliance: + * contact@openairinterface.org + */ + +/*! \file PHY/LTE_TRANSPORT/dlsch_llr_computation.c + * \brief Top-level routines for LLR computation of the PDSCH physical channel from 36-211, V8.6 2009-03 + * \author R. Knopp, F. Kaltenberger,A. Bhamri, S. Aubert, S. Wagner, X Jiang + * \date 2011 + * \version 0.1 + * \company Eurecom + * \email: knopp@eurecom.fr,florian.kaltenberger@eurecom.fr,ankit.bhamri@eurecom.fr,sebastien.aubert@eurecom.fr, sebastian.wagner@eurecom.fr + * \note + * \warning + */ + +#include "PHY/defs.h" +#include "PHY/TOOLS/defs.h" +#include "PHY/extern.h" +#include "defs.h" +#include "extern.h" +#include "PHY/sse_intrin.h" + +int16_t ones256[16] __attribute__ ((aligned(32))) = {0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff,0xffff}; + +static __m256i rho_rpi __attribute__ ((aligned(32))); +static __m256i rho_rmi __attribute__ ((aligned(32))); +static __m256i rho_rpi_1_1 __attribute__ ((aligned(32))); +static __m256i rho_rpi_1_3 __attribute__ ((aligned(32))); +static __m256i rho_rpi_1_5 __attribute__ ((aligned(32))); +static __m256i rho_rpi_1_7 __attribute__ ((aligned(32))); +static __m256i rho_rpi_3_1 __attribute__ ((aligned(32))); +static __m256i rho_rpi_3_3 __attribute__ ((aligned(32))); +static __m256i rho_rpi_3_5 __attribute__ ((aligned(32))); +static __m256i rho_rpi_3_7 __attribute__ ((aligned(32))); +static __m256i rho_rpi_5_1 __attribute__ ((aligned(32))); +static __m256i rho_rpi_5_3 __attribute__ ((aligned(32))); +static __m256i rho_rpi_5_5 __attribute__ ((aligned(32))); +static __m256i rho_rpi_5_7 __attribute__ ((aligned(32))); +static __m256i rho_rpi_7_1 __attribute__ ((aligned(32))); +static __m256i rho_rpi_7_3 __attribute__ ((aligned(32))); +static __m256i rho_rpi_7_5 __attribute__ ((aligned(32))); +static __m256i rho_rpi_7_7 __attribute__ ((aligned(32))); +static __m256i rho_rmi_1_1 __attribute__ ((aligned(32))); +static __m256i rho_rmi_1_3 __attribute__ ((aligned(32))); +static __m256i rho_rmi_1_5 __attribute__ ((aligned(32))); +static __m256i rho_rmi_1_7 __attribute__ ((aligned(32))); +static __m256i rho_rmi_3_1 __attribute__ ((aligned(32))); +static __m256i rho_rmi_3_3 __attribute__ ((aligned(32))); +static __m256i rho_rmi_3_5 __attribute__ ((aligned(32))); +static __m256i rho_rmi_3_7 __attribute__ ((aligned(32))); +static __m256i rho_rmi_5_1 __attribute__ ((aligned(32))); +static __m256i rho_rmi_5_3 __attribute__ ((aligned(32))); +static __m256i rho_rmi_5_5 __attribute__ ((aligned(32))); +static __m256i rho_rmi_5_7 __attribute__ ((aligned(32))); +static __m256i rho_rmi_7_1 __attribute__ ((aligned(32))); +static __m256i rho_rmi_7_3 __attribute__ ((aligned(32))); +static __m256i rho_rmi_7_5 __attribute__ ((aligned(32))); +static __m256i rho_rmi_7_7 __attribute__ ((aligned(32))); + +static __m256i psi_r_m7_m7 __attribute__ ((aligned(32))); +static __m256i psi_r_m7_m5 __attribute__ ((aligned(32))); +static __m256i psi_r_m7_m3 __attribute__ ((aligned(32))); +static __m256i psi_r_m7_m1 __attribute__ ((aligned(32))); +static __m256i psi_r_m7_p1 __attribute__ ((aligned(32))); +static __m256i psi_r_m7_p3 __attribute__ ((aligned(32))); +static __m256i psi_r_m7_p5 __attribute__ ((aligned(32))); +static __m256i psi_r_m7_p7 __attribute__ ((aligned(32))); +static __m256i psi_r_m5_m7 __attribute__ ((aligned(32))); +static __m256i psi_r_m5_m5 __attribute__ ((aligned(32))); +static __m256i psi_r_m5_m3 __attribute__ ((aligned(32))); +static __m256i psi_r_m5_m1 __attribute__ ((aligned(32))); +static __m256i psi_r_m5_p1 __attribute__ ((aligned(32))); +static __m256i psi_r_m5_p3 __attribute__ ((aligned(32))); +static __m256i psi_r_m5_p5 __attribute__ ((aligned(32))); +static __m256i psi_r_m5_p7 __attribute__ ((aligned(32))); +static __m256i psi_r_m3_m7 __attribute__ ((aligned(32))); +static __m256i psi_r_m3_m5 __attribute__ ((aligned(32))); +static __m256i psi_r_m3_m3 __attribute__ ((aligned(32))); +static __m256i psi_r_m3_m1 __attribute__ ((aligned(32))); +static __m256i psi_r_m3_p1 __attribute__ ((aligned(32))); +static __m256i psi_r_m3_p3 __attribute__ ((aligned(32))); +static __m256i psi_r_m3_p5 __attribute__ ((aligned(32))); +static __m256i psi_r_m3_p7 __attribute__ ((aligned(32))); +static __m256i psi_r_m1_m7 __attribute__ ((aligned(32))); +static __m256i psi_r_m1_m5 __attribute__ ((aligned(32))); +static __m256i psi_r_m1_m3 __attribute__ ((aligned(32))); +static __m256i psi_r_m1_m1 __attribute__ ((aligned(32))); +static __m256i psi_r_m1_p1 __attribute__ ((aligned(32))); +static __m256i psi_r_m1_p3 __attribute__ ((aligned(32))); +static __m256i psi_r_m1_p5 __attribute__ ((aligned(32))); +static __m256i psi_r_m1_p7 __attribute__ ((aligned(32))); +static __m256i psi_r_p1_m7 __attribute__ ((aligned(32))); +static __m256i psi_r_p1_m5 __attribute__ ((aligned(32))); +static __m256i psi_r_p1_m3 __attribute__ ((aligned(32))); +static __m256i psi_r_p1_m1 __attribute__ ((aligned(32))); +static __m256i psi_r_p1_p1 __attribute__ ((aligned(32))); +static __m256i psi_r_p1_p3 __attribute__ ((aligned(32))); +static __m256i psi_r_p1_p5 __attribute__ ((aligned(32))); +static __m256i psi_r_p1_p7 __attribute__ ((aligned(32))); +static __m256i psi_r_p3_m7 __attribute__ ((aligned(32))); +static __m256i psi_r_p3_m5 __attribute__ ((aligned(32))); +static __m256i psi_r_p3_m3 __attribute__ ((aligned(32))); +static __m256i psi_r_p3_m1 __attribute__ ((aligned(32))); +static __m256i psi_r_p3_p1 __attribute__ ((aligned(32))); +static __m256i psi_r_p3_p3 __attribute__ ((aligned(32))); +static __m256i psi_r_p3_p5 __attribute__ ((aligned(32))); +static __m256i psi_r_p3_p7 __attribute__ ((aligned(32))); +static __m256i psi_r_p5_m7 __attribute__ ((aligned(32))); +static __m256i psi_r_p5_m5 __attribute__ ((aligned(32))); +static __m256i psi_r_p5_m3 __attribute__ ((aligned(32))); +static __m256i psi_r_p5_m1 __attribute__ ((aligned(32))); +static __m256i psi_r_p5_p1 __attribute__ ((aligned(32))); +static __m256i psi_r_p5_p3 __attribute__ ((aligned(32))); +static __m256i psi_r_p5_p5 __attribute__ ((aligned(32))); +static __m256i psi_r_p5_p7 __attribute__ ((aligned(32))); +static __m256i psi_r_p7_m7 __attribute__ ((aligned(32))); +static __m256i psi_r_p7_m5 __attribute__ ((aligned(32))); +static __m256i psi_r_p7_m3 __attribute__ ((aligned(32))); +static __m256i psi_r_p7_m1 __attribute__ ((aligned(32))); +static __m256i psi_r_p7_p1 __attribute__ ((aligned(32))); +static __m256i psi_r_p7_p3 __attribute__ ((aligned(32))); +static __m256i psi_r_p7_p5 __attribute__ ((aligned(32))); +static __m256i psi_r_p7_p7 __attribute__ ((aligned(32))); + +static __m256i psi_i_m7_m7 __attribute__ ((aligned(32))); +static __m256i psi_i_m7_m5 __attribute__ ((aligned(32))); +static __m256i psi_i_m7_m3 __attribute__ ((aligned(32))); +static __m256i psi_i_m7_m1 __attribute__ ((aligned(32))); +static __m256i psi_i_m7_p1 __attribute__ ((aligned(32))); +static __m256i psi_i_m7_p3 __attribute__ ((aligned(32))); +static __m256i psi_i_m7_p5 __attribute__ ((aligned(32))); +static __m256i psi_i_m7_p7 __attribute__ ((aligned(32))); +static __m256i psi_i_m5_m7 __attribute__ ((aligned(32))); +static __m256i psi_i_m5_m5 __attribute__ ((aligned(32))); +static __m256i psi_i_m5_m3 __attribute__ ((aligned(32))); +static __m256i psi_i_m5_m1 __attribute__ ((aligned(32))); +static __m256i psi_i_m5_p1 __attribute__ ((aligned(32))); +static __m256i psi_i_m5_p3 __attribute__ ((aligned(32))); +static __m256i psi_i_m5_p5 __attribute__ ((aligned(32))); +static __m256i psi_i_m5_p7 __attribute__ ((aligned(32))); +static __m256i psi_i_m3_m7 __attribute__ ((aligned(32))); +static __m256i psi_i_m3_m5 __attribute__ ((aligned(32))); +static __m256i psi_i_m3_m3 __attribute__ ((aligned(32))); +static __m256i psi_i_m3_m1 __attribute__ ((aligned(32))); +static __m256i psi_i_m3_p1 __attribute__ ((aligned(32))); +static __m256i psi_i_m3_p3 __attribute__ ((aligned(32))); +static __m256i psi_i_m3_p5 __attribute__ ((aligned(32))); +static __m256i psi_i_m3_p7 __attribute__ ((aligned(32))); +static __m256i psi_i_m1_m7 __attribute__ ((aligned(32))); +static __m256i psi_i_m1_m5 __attribute__ ((aligned(32))); +static __m256i psi_i_m1_m3 __attribute__ ((aligned(32))); +static __m256i psi_i_m1_m1 __attribute__ ((aligned(32))); +static __m256i psi_i_m1_p1 __attribute__ ((aligned(32))); +static __m256i psi_i_m1_p3 __attribute__ ((aligned(32))); +static __m256i psi_i_m1_p5 __attribute__ ((aligned(32))); +static __m256i psi_i_m1_p7 __attribute__ ((aligned(32))); +static __m256i psi_i_p1_m7 __attribute__ ((aligned(32))); +static __m256i psi_i_p1_m5 __attribute__ ((aligned(32))); +static __m256i psi_i_p1_m3 __attribute__ ((aligned(32))); +static __m256i psi_i_p1_m1 __attribute__ ((aligned(32))); +static __m256i psi_i_p1_p1 __attribute__ ((aligned(32))); +static __m256i psi_i_p1_p3 __attribute__ ((aligned(32))); +static __m256i psi_i_p1_p5 __attribute__ ((aligned(32))); +static __m256i psi_i_p1_p7 __attribute__ ((aligned(32))); +static __m256i psi_i_p3_m7 __attribute__ ((aligned(32))); +static __m256i psi_i_p3_m5 __attribute__ ((aligned(32))); +static __m256i psi_i_p3_m3 __attribute__ ((aligned(32))); +static __m256i psi_i_p3_m1 __attribute__ ((aligned(32))); +static __m256i psi_i_p3_p1 __attribute__ ((aligned(32))); +static __m256i psi_i_p3_p3 __attribute__ ((aligned(32))); +static __m256i psi_i_p3_p5 __attribute__ ((aligned(32))); +static __m256i psi_i_p3_p7 __attribute__ ((aligned(32))); +static __m256i psi_i_p5_m7 __attribute__ ((aligned(32))); +static __m256i psi_i_p5_m5 __attribute__ ((aligned(32))); +static __m256i psi_i_p5_m3 __attribute__ ((aligned(32))); +static __m256i psi_i_p5_m1 __attribute__ ((aligned(32))); +static __m256i psi_i_p5_p1 __attribute__ ((aligned(32))); +static __m256i psi_i_p5_p3 __attribute__ ((aligned(32))); +static __m256i psi_i_p5_p5 __attribute__ ((aligned(32))); +static __m256i psi_i_p5_p7 __attribute__ ((aligned(32))); +static __m256i psi_i_p7_m7 __attribute__ ((aligned(32))); +static __m256i psi_i_p7_m5 __attribute__ ((aligned(32))); +static __m256i psi_i_p7_m3 __attribute__ ((aligned(32))); +static __m256i psi_i_p7_m1 __attribute__ ((aligned(32))); +static __m256i psi_i_p7_p1 __attribute__ ((aligned(32))); +static __m256i psi_i_p7_p3 __attribute__ ((aligned(32))); +static __m256i psi_i_p7_p5 __attribute__ ((aligned(32))); +static __m256i psi_i_p7_p7 __attribute__ ((aligned(32))); + +static __m256i a_r_m7_m7 __attribute__ ((aligned(32))); +static __m256i a_r_m7_m5 __attribute__ ((aligned(32))); +static __m256i a_r_m7_m3 __attribute__ ((aligned(32))); +static __m256i a_r_m7_m1 __attribute__ ((aligned(32))); +static __m256i a_r_m7_p1 __attribute__ ((aligned(32))); +static __m256i a_r_m7_p3 __attribute__ ((aligned(32))); +static __m256i a_r_m7_p5 __attribute__ ((aligned(32))); +static __m256i a_r_m7_p7 __attribute__ ((aligned(32))); +static __m256i a_r_m5_m7 __attribute__ ((aligned(32))); +static __m256i a_r_m5_m5 __attribute__ ((aligned(32))); +static __m256i a_r_m5_m3 __attribute__ ((aligned(32))); +static __m256i a_r_m5_m1 __attribute__ ((aligned(32))); +static __m256i a_r_m5_p1 __attribute__ ((aligned(32))); +static __m256i a_r_m5_p3 __attribute__ ((aligned(32))); +static __m256i a_r_m5_p5 __attribute__ ((aligned(32))); +static __m256i a_r_m5_p7 __attribute__ ((aligned(32))); +static __m256i a_r_m3_m7 __attribute__ ((aligned(32))); +static __m256i a_r_m3_m5 __attribute__ ((aligned(32))); +static __m256i a_r_m3_m3 __attribute__ ((aligned(32))); +static __m256i a_r_m3_m1 __attribute__ ((aligned(32))); +static __m256i a_r_m3_p1 __attribute__ ((aligned(32))); +static __m256i a_r_m3_p3 __attribute__ ((aligned(32))); +static __m256i a_r_m3_p5 __attribute__ ((aligned(32))); +static __m256i a_r_m3_p7 __attribute__ ((aligned(32))); +static __m256i a_r_m1_m7 __attribute__ ((aligned(32))); +static __m256i a_r_m1_m5 __attribute__ ((aligned(32))); +static __m256i a_r_m1_m3 __attribute__ ((aligned(32))); +static __m256i a_r_m1_m1 __attribute__ ((aligned(32))); +static __m256i a_r_m1_p1 __attribute__ ((aligned(32))); +static __m256i a_r_m1_p3 __attribute__ ((aligned(32))); +static __m256i a_r_m1_p5 __attribute__ ((aligned(32))); +static __m256i a_r_m1_p7 __attribute__ ((aligned(32))); +static __m256i a_r_p1_m7 __attribute__ ((aligned(32))); +static __m256i a_r_p1_m5 __attribute__ ((aligned(32))); +static __m256i a_r_p1_m3 __attribute__ ((aligned(32))); +static __m256i a_r_p1_m1 __attribute__ ((aligned(32))); +static __m256i a_r_p1_p1 __attribute__ ((aligned(32))); +static __m256i a_r_p1_p3 __attribute__ ((aligned(32))); +static __m256i a_r_p1_p5 __attribute__ ((aligned(32))); +static __m256i a_r_p1_p7 __attribute__ ((aligned(32))); +static __m256i a_r_p3_m7 __attribute__ ((aligned(32))); +static __m256i a_r_p3_m5 __attribute__ ((aligned(32))); +static __m256i a_r_p3_m3 __attribute__ ((aligned(32))); +static __m256i a_r_p3_m1 __attribute__ ((aligned(32))); +static __m256i a_r_p3_p1 __attribute__ ((aligned(32))); +static __m256i a_r_p3_p3 __attribute__ ((aligned(32))); +static __m256i a_r_p3_p5 __attribute__ ((aligned(32))); +static __m256i a_r_p3_p7 __attribute__ ((aligned(32))); +static __m256i a_r_p5_m7 __attribute__ ((aligned(32))); +static __m256i a_r_p5_m5 __attribute__ ((aligned(32))); +static __m256i a_r_p5_m3 __attribute__ ((aligned(32))); +static __m256i a_r_p5_m1 __attribute__ ((aligned(32))); +static __m256i a_r_p5_p1 __attribute__ ((aligned(32))); +static __m256i a_r_p5_p3 __attribute__ ((aligned(32))); +static __m256i a_r_p5_p5 __attribute__ ((aligned(32))); +static __m256i a_r_p5_p7 __attribute__ ((aligned(32))); +static __m256i a_r_p7_m7 __attribute__ ((aligned(32))); +static __m256i a_r_p7_m5 __attribute__ ((aligned(32))); +static __m256i a_r_p7_m3 __attribute__ ((aligned(32))); +static __m256i a_r_p7_m1 __attribute__ ((aligned(32))); +static __m256i a_r_p7_p1 __attribute__ ((aligned(32))); +static __m256i a_r_p7_p3 __attribute__ ((aligned(32))); +static __m256i a_r_p7_p5 __attribute__ ((aligned(32))); +static __m256i a_r_p7_p7 __attribute__ ((aligned(32))); + +static __m256i a_i_m7_m7 __attribute__ ((aligned(32))); +static __m256i a_i_m7_m5 __attribute__ ((aligned(32))); +static __m256i a_i_m7_m3 __attribute__ ((aligned(32))); +static __m256i a_i_m7_m1 __attribute__ ((aligned(32))); +static __m256i a_i_m7_p1 __attribute__ ((aligned(32))); +static __m256i a_i_m7_p3 __attribute__ ((aligned(32))); +static __m256i a_i_m7_p5 __attribute__ ((aligned(32))); +static __m256i a_i_m7_p7 __attribute__ ((aligned(32))); +static __m256i a_i_m5_m7 __attribute__ ((aligned(32))); +static __m256i a_i_m5_m5 __attribute__ ((aligned(32))); +static __m256i a_i_m5_m3 __attribute__ ((aligned(32))); +static __m256i a_i_m5_m1 __attribute__ ((aligned(32))); +static __m256i a_i_m5_p1 __attribute__ ((aligned(32))); +static __m256i a_i_m5_p3 __attribute__ ((aligned(32))); +static __m256i a_i_m5_p5 __attribute__ ((aligned(32))); +static __m256i a_i_m5_p7 __attribute__ ((aligned(32))); +static __m256i a_i_m3_m7 __attribute__ ((aligned(32))); +static __m256i a_i_m3_m5 __attribute__ ((aligned(32))); +static __m256i a_i_m3_m3 __attribute__ ((aligned(32))); +static __m256i a_i_m3_m1 __attribute__ ((aligned(32))); +static __m256i a_i_m3_p1 __attribute__ ((aligned(32))); +static __m256i a_i_m3_p3 __attribute__ ((aligned(32))); +static __m256i a_i_m3_p5 __attribute__ ((aligned(32))); +static __m256i a_i_m3_p7 __attribute__ ((aligned(32))); +static __m256i a_i_m1_m7 __attribute__ ((aligned(32))); +static __m256i a_i_m1_m5 __attribute__ ((aligned(32))); +static __m256i a_i_m1_m3 __attribute__ ((aligned(32))); +static __m256i a_i_m1_m1 __attribute__ ((aligned(32))); +static __m256i a_i_m1_p1 __attribute__ ((aligned(32))); +static __m256i a_i_m1_p3 __attribute__ ((aligned(32))); +static __m256i a_i_m1_p5 __attribute__ ((aligned(32))); +static __m256i a_i_m1_p7 __attribute__ ((aligned(32))); +static __m256i a_i_p1_m7 __attribute__ ((aligned(32))); +static __m256i a_i_p1_m5 __attribute__ ((aligned(32))); +static __m256i a_i_p1_m3 __attribute__ ((aligned(32))); +static __m256i a_i_p1_m1 __attribute__ ((aligned(32))); +static __m256i a_i_p1_p1 __attribute__ ((aligned(32))); +static __m256i a_i_p1_p3 __attribute__ ((aligned(32))); +static __m256i a_i_p1_p5 __attribute__ ((aligned(32))); +static __m256i a_i_p1_p7 __attribute__ ((aligned(32))); +static __m256i a_i_p3_m7 __attribute__ ((aligned(32))); +static __m256i a_i_p3_m5 __attribute__ ((aligned(32))); +static __m256i a_i_p3_m3 __attribute__ ((aligned(32))); +static __m256i a_i_p3_m1 __attribute__ ((aligned(32))); +static __m256i a_i_p3_p1 __attribute__ ((aligned(32))); +static __m256i a_i_p3_p3 __attribute__ ((aligned(32))); +static __m256i a_i_p3_p5 __attribute__ ((aligned(32))); +static __m256i a_i_p3_p7 __attribute__ ((aligned(32))); +static __m256i a_i_p5_m7 __attribute__ ((aligned(32))); +static __m256i a_i_p5_m5 __attribute__ ((aligned(32))); +static __m256i a_i_p5_m3 __attribute__ ((aligned(32))); +static __m256i a_i_p5_m1 __attribute__ ((aligned(32))); +static __m256i a_i_p5_p1 __attribute__ ((aligned(32))); +static __m256i a_i_p5_p3 __attribute__ ((aligned(32))); +static __m256i a_i_p5_p5 __attribute__ ((aligned(32))); +static __m256i a_i_p5_p7 __attribute__ ((aligned(32))); +static __m256i a_i_p7_m7 __attribute__ ((aligned(32))); +static __m256i a_i_p7_m5 __attribute__ ((aligned(32))); +static __m256i a_i_p7_m3 __attribute__ ((aligned(32))); +static __m256i a_i_p7_m1 __attribute__ ((aligned(32))); +static __m256i a_i_p7_p1 __attribute__ ((aligned(32))); +static __m256i a_i_p7_p3 __attribute__ ((aligned(32))); +static __m256i a_i_p7_p5 __attribute__ ((aligned(32))); +static __m256i a_i_p7_p7 __attribute__ ((aligned(32))); + +static __m256i psi_a_m7_m7 __attribute__ ((aligned(32))); +static __m256i psi_a_m7_m5 __attribute__ ((aligned(32))); +static __m256i psi_a_m7_m3 __attribute__ ((aligned(32))); +static __m256i psi_a_m7_m1 __attribute__ ((aligned(32))); +static __m256i psi_a_m7_p1 __attribute__ ((aligned(32))); +static __m256i psi_a_m7_p3 __attribute__ ((aligned(32))); +static __m256i psi_a_m7_p5 __attribute__ ((aligned(32))); +static __m256i psi_a_m7_p7 __attribute__ ((aligned(32))); +static __m256i psi_a_m5_m7 __attribute__ ((aligned(32))); +static __m256i psi_a_m5_m5 __attribute__ ((aligned(32))); +static __m256i psi_a_m5_m3 __attribute__ ((aligned(32))); +static __m256i psi_a_m5_m1 __attribute__ ((aligned(32))); +static __m256i psi_a_m5_p1 __attribute__ ((aligned(32))); +static __m256i psi_a_m5_p3 __attribute__ ((aligned(32))); +static __m256i psi_a_m5_p5 __attribute__ ((aligned(32))); +static __m256i psi_a_m5_p7 __attribute__ ((aligned(32))); +static __m256i psi_a_m3_m7 __attribute__ ((aligned(32))); +static __m256i psi_a_m3_m5 __attribute__ ((aligned(32))); +static __m256i psi_a_m3_m3 __attribute__ ((aligned(32))); +static __m256i psi_a_m3_m1 __attribute__ ((aligned(32))); +static __m256i psi_a_m3_p1 __attribute__ ((aligned(32))); +static __m256i psi_a_m3_p3 __attribute__ ((aligned(32))); +static __m256i psi_a_m3_p5 __attribute__ ((aligned(32))); +static __m256i psi_a_m3_p7 __attribute__ ((aligned(32))); +static __m256i psi_a_m1_m7 __attribute__ ((aligned(32))); +static __m256i psi_a_m1_m5 __attribute__ ((aligned(32))); +static __m256i psi_a_m1_m3 __attribute__ ((aligned(32))); +static __m256i psi_a_m1_m1 __attribute__ ((aligned(32))); +static __m256i psi_a_m1_p1 __attribute__ ((aligned(32))); +static __m256i psi_a_m1_p3 __attribute__ ((aligned(32))); +static __m256i psi_a_m1_p5 __attribute__ ((aligned(32))); +static __m256i psi_a_m1_p7 __attribute__ ((aligned(32))); +static __m256i psi_a_p1_m7 __attribute__ ((aligned(32))); +static __m256i psi_a_p1_m5 __attribute__ ((aligned(32))); +static __m256i psi_a_p1_m3 __attribute__ ((aligned(32))); +static __m256i psi_a_p1_m1 __attribute__ ((aligned(32))); +static __m256i psi_a_p1_p1 __attribute__ ((aligned(32))); +static __m256i psi_a_p1_p3 __attribute__ ((aligned(32))); +static __m256i psi_a_p1_p5 __attribute__ ((aligned(32))); +static __m256i psi_a_p1_p7 __attribute__ ((aligned(32))); +static __m256i psi_a_p3_m7 __attribute__ ((aligned(32))); +static __m256i psi_a_p3_m5 __attribute__ ((aligned(32))); +static __m256i psi_a_p3_m3 __attribute__ ((aligned(32))); +static __m256i psi_a_p3_m1 __attribute__ ((aligned(32))); +static __m256i psi_a_p3_p1 __attribute__ ((aligned(32))); +static __m256i psi_a_p3_p3 __attribute__ ((aligned(32))); +static __m256i psi_a_p3_p5 __attribute__ ((aligned(32))); +static __m256i psi_a_p3_p7 __attribute__ ((aligned(32))); +static __m256i psi_a_p5_m7 __attribute__ ((aligned(32))); +static __m256i psi_a_p5_m5 __attribute__ ((aligned(32))); +static __m256i psi_a_p5_m3 __attribute__ ((aligned(32))); +static __m256i psi_a_p5_m1 __attribute__ ((aligned(32))); +static __m256i psi_a_p5_p1 __attribute__ ((aligned(32))); +static __m256i psi_a_p5_p3 __attribute__ ((aligned(32))); +static __m256i psi_a_p5_p5 __attribute__ ((aligned(32))); +static __m256i psi_a_p5_p7 __attribute__ ((aligned(32))); +static __m256i psi_a_p7_m7 __attribute__ ((aligned(32))); +static __m256i psi_a_p7_m5 __attribute__ ((aligned(32))); +static __m256i psi_a_p7_m3 __attribute__ ((aligned(32))); +static __m256i psi_a_p7_m1 __attribute__ ((aligned(32))); +static __m256i psi_a_p7_p1 __attribute__ ((aligned(32))); +static __m256i psi_a_p7_p3 __attribute__ ((aligned(32))); +static __m256i psi_a_p7_p5 __attribute__ ((aligned(32))); +static __m256i psi_a_p7_p7 __attribute__ ((aligned(32))); + +static __m256i a_sq_m7_m7 __attribute__ ((aligned(32))); +static __m256i a_sq_m7_m5 __attribute__ ((aligned(32))); +static __m256i a_sq_m7_m3 __attribute__ ((aligned(32))); +static __m256i a_sq_m7_m1 __attribute__ ((aligned(32))); +static __m256i a_sq_m7_p1 __attribute__ ((aligned(32))); +static __m256i a_sq_m7_p3 __attribute__ ((aligned(32))); +static __m256i a_sq_m7_p5 __attribute__ ((aligned(32))); +static __m256i a_sq_m7_p7 __attribute__ ((aligned(32))); +static __m256i a_sq_m5_m7 __attribute__ ((aligned(32))); +static __m256i a_sq_m5_m5 __attribute__ ((aligned(32))); +static __m256i a_sq_m5_m3 __attribute__ ((aligned(32))); +static __m256i a_sq_m5_m1 __attribute__ ((aligned(32))); +static __m256i a_sq_m5_p1 __attribute__ ((aligned(32))); +static __m256i a_sq_m5_p3 __attribute__ ((aligned(32))); +static __m256i a_sq_m5_p5 __attribute__ ((aligned(32))); +static __m256i a_sq_m5_p7 __attribute__ ((aligned(32))); +static __m256i a_sq_m3_m7 __attribute__ ((aligned(32))); +static __m256i a_sq_m3_m5 __attribute__ ((aligned(32))); +static __m256i a_sq_m3_m3 __attribute__ ((aligned(32))); +static __m256i a_sq_m3_m1 __attribute__ ((aligned(32))); +static __m256i a_sq_m3_p1 __attribute__ ((aligned(32))); +static __m256i a_sq_m3_p3 __attribute__ ((aligned(32))); +static __m256i a_sq_m3_p5 __attribute__ ((aligned(32))); +static __m256i a_sq_m3_p7 __attribute__ ((aligned(32))); +static __m256i a_sq_m1_m7 __attribute__ ((aligned(32))); +static __m256i a_sq_m1_m5 __attribute__ ((aligned(32))); +static __m256i a_sq_m1_m3 __attribute__ ((aligned(32))); +static __m256i a_sq_m1_m1 __attribute__ ((aligned(32))); +static __m256i a_sq_m1_p1 __attribute__ ((aligned(32))); +static __m256i a_sq_m1_p3 __attribute__ ((aligned(32))); +static __m256i a_sq_m1_p5 __attribute__ ((aligned(32))); +static __m256i a_sq_m1_p7 __attribute__ ((aligned(32))); +static __m256i a_sq_p1_m7 __attribute__ ((aligned(32))); +static __m256i a_sq_p1_m5 __attribute__ ((aligned(32))); +static __m256i a_sq_p1_m3 __attribute__ ((aligned(32))); +static __m256i a_sq_p1_m1 __attribute__ ((aligned(32))); +static __m256i a_sq_p1_p1 __attribute__ ((aligned(32))); +static __m256i a_sq_p1_p3 __attribute__ ((aligned(32))); +static __m256i a_sq_p1_p5 __attribute__ ((aligned(32))); +static __m256i a_sq_p1_p7 __attribute__ ((aligned(32))); +static __m256i a_sq_p3_m7 __attribute__ ((aligned(32))); +static __m256i a_sq_p3_m5 __attribute__ ((aligned(32))); +static __m256i a_sq_p3_m3 __attribute__ ((aligned(32))); +static __m256i a_sq_p3_m1 __attribute__ ((aligned(32))); +static __m256i a_sq_p3_p1 __attribute__ ((aligned(32))); +static __m256i a_sq_p3_p3 __attribute__ ((aligned(32))); +static __m256i a_sq_p3_p5 __attribute__ ((aligned(32))); +static __m256i a_sq_p3_p7 __attribute__ ((aligned(32))); +static __m256i a_sq_p5_m7 __attribute__ ((aligned(32))); +static __m256i a_sq_p5_m5 __attribute__ ((aligned(32))); +static __m256i a_sq_p5_m3 __attribute__ ((aligned(32))); +static __m256i a_sq_p5_m1 __attribute__ ((aligned(32))); +static __m256i a_sq_p5_p1 __attribute__ ((aligned(32))); +static __m256i a_sq_p5_p3 __attribute__ ((aligned(32))); +static __m256i a_sq_p5_p5 __attribute__ ((aligned(32))); +static __m256i a_sq_p5_p7 __attribute__ ((aligned(32))); +static __m256i a_sq_p7_m7 __attribute__ ((aligned(32))); +static __m256i a_sq_p7_m5 __attribute__ ((aligned(32))); +static __m256i a_sq_p7_m3 __attribute__ ((aligned(32))); +static __m256i a_sq_p7_m1 __attribute__ ((aligned(32))); +static __m256i a_sq_p7_p1 __attribute__ ((aligned(32))); +static __m256i a_sq_p7_p3 __attribute__ ((aligned(32))); +static __m256i a_sq_p7_p5 __attribute__ ((aligned(32))); +static __m256i a_sq_p7_p7 __attribute__ ((aligned(32))); + +static __m256i bit_met_m7_m7 __attribute__ ((aligned(32))); +static __m256i bit_met_m7_m5 __attribute__ ((aligned(32))); +static __m256i bit_met_m7_m3 __attribute__ ((aligned(32))); +static __m256i bit_met_m7_m1 __attribute__ ((aligned(32))); +static __m256i bit_met_m7_p1 __attribute__ ((aligned(32))); +static __m256i bit_met_m7_p3 __attribute__ ((aligned(32))); +static __m256i bit_met_m7_p5 __attribute__ ((aligned(32))); +static __m256i bit_met_m7_p7 __attribute__ ((aligned(32))); +static __m256i bit_met_m5_m7 __attribute__ ((aligned(32))); +static __m256i bit_met_m5_m5 __attribute__ ((aligned(32))); +static __m256i bit_met_m5_m3 __attribute__ ((aligned(32))); +static __m256i bit_met_m5_m1 __attribute__ ((aligned(32))); +static __m256i bit_met_m5_p1 __attribute__ ((aligned(32))); +static __m256i bit_met_m5_p3 __attribute__ ((aligned(32))); +static __m256i bit_met_m5_p5 __attribute__ ((aligned(32))); +static __m256i bit_met_m5_p7 __attribute__ ((aligned(32))); +static __m256i bit_met_m3_m7 __attribute__ ((aligned(32))); +static __m256i bit_met_m3_m5 __attribute__ ((aligned(32))); +static __m256i bit_met_m3_m3 __attribute__ ((aligned(32))); +static __m256i bit_met_m3_m1 __attribute__ ((aligned(32))); +static __m256i bit_met_m3_p1 __attribute__ ((aligned(32))); +static __m256i bit_met_m3_p3 __attribute__ ((aligned(32))); +static __m256i bit_met_m3_p5 __attribute__ ((aligned(32))); +static __m256i bit_met_m3_p7 __attribute__ ((aligned(32))); +static __m256i bit_met_m1_m7 __attribute__ ((aligned(32))); +static __m256i bit_met_m1_m5 __attribute__ ((aligned(32))); +static __m256i bit_met_m1_m3 __attribute__ ((aligned(32))); +static __m256i bit_met_m1_m1 __attribute__ ((aligned(32))); +static __m256i bit_met_m1_p1 __attribute__ ((aligned(32))); +static __m256i bit_met_m1_p3 __attribute__ ((aligned(32))); +static __m256i bit_met_m1_p5 __attribute__ ((aligned(32))); +static __m256i bit_met_m1_p7 __attribute__ ((aligned(32))); +static __m256i bit_met_p1_m7 __attribute__ ((aligned(32))); +static __m256i bit_met_p1_m5 __attribute__ ((aligned(32))); +static __m256i bit_met_p1_m3 __attribute__ ((aligned(32))); +static __m256i bit_met_p1_m1 __attribute__ ((aligned(32))); +static __m256i bit_met_p1_p1 __attribute__ ((aligned(32))); +static __m256i bit_met_p1_p3 __attribute__ ((aligned(32))); +static __m256i bit_met_p1_p5 __attribute__ ((aligned(32))); +static __m256i bit_met_p1_p7 __attribute__ ((aligned(32))); +static __m256i bit_met_p3_m7 __attribute__ ((aligned(32))); +static __m256i bit_met_p3_m5 __attribute__ ((aligned(32))); +static __m256i bit_met_p3_m3 __attribute__ ((aligned(32))); +static __m256i bit_met_p3_m1 __attribute__ ((aligned(32))); +static __m256i bit_met_p3_p1 __attribute__ ((aligned(32))); +static __m256i bit_met_p3_p3 __attribute__ ((aligned(32))); +static __m256i bit_met_p3_p5 __attribute__ ((aligned(32))); +static __m256i bit_met_p3_p7 __attribute__ ((aligned(32))); +static __m256i bit_met_p5_m7 __attribute__ ((aligned(32))); +static __m256i bit_met_p5_m5 __attribute__ ((aligned(32))); +static __m256i bit_met_p5_m3 __attribute__ ((aligned(32))); +static __m256i bit_met_p5_m1 __attribute__ ((aligned(32))); +static __m256i bit_met_p5_p1 __attribute__ ((aligned(32))); +static __m256i bit_met_p5_p3 __attribute__ ((aligned(32))); +static __m256i bit_met_p5_p5 __attribute__ ((aligned(32))); +static __m256i bit_met_p5_p7 __attribute__ ((aligned(32))); +static __m256i bit_met_p7_m7 __attribute__ ((aligned(32))); +static __m256i bit_met_p7_m5 __attribute__ ((aligned(32))); +static __m256i bit_met_p7_m3 __attribute__ ((aligned(32))); +static __m256i bit_met_p7_m1 __attribute__ ((aligned(32))); +static __m256i bit_met_p7_p1 __attribute__ ((aligned(32))); +static __m256i bit_met_p7_p3 __attribute__ ((aligned(32))); +static __m256i bit_met_p7_p5 __attribute__ ((aligned(32))); +static __m256i bit_met_p7_p7 __attribute__ ((aligned(32))); + +static __m256i y0_p_1_1 __attribute__ ((aligned(32))); +static __m256i y0_p_1_3 __attribute__ ((aligned(32))); +static __m256i y0_p_1_5 __attribute__ ((aligned(32))); +static __m256i y0_p_1_7 __attribute__ ((aligned(32))); +static __m256i y0_p_3_1 __attribute__ ((aligned(32))); +static __m256i y0_p_3_3 __attribute__ ((aligned(32))); +static __m256i y0_p_3_5 __attribute__ ((aligned(32))); +static __m256i y0_p_3_7 __attribute__ ((aligned(32))); +static __m256i y0_p_5_1 __attribute__ ((aligned(32))); +static __m256i y0_p_5_3 __attribute__ ((aligned(32))); +static __m256i y0_p_5_5 __attribute__ ((aligned(32))); +static __m256i y0_p_5_7 __attribute__ ((aligned(32))); +static __m256i y0_p_7_1 __attribute__ ((aligned(32))); +static __m256i y0_p_7_3 __attribute__ ((aligned(32))); +static __m256i y0_p_7_5 __attribute__ ((aligned(32))); +static __m256i y0_p_7_7 __attribute__ ((aligned(32))); +static __m256i y0_m_1_1 __attribute__ ((aligned(32))); +static __m256i y0_m_1_3 __attribute__ ((aligned(32))); +static __m256i y0_m_1_5 __attribute__ ((aligned(32))); +static __m256i y0_m_1_7 __attribute__ ((aligned(32))); +static __m256i y0_m_3_1 __attribute__ ((aligned(32))); +static __m256i y0_m_3_3 __attribute__ ((aligned(32))); +static __m256i y0_m_3_5 __attribute__ ((aligned(32))); +static __m256i y0_m_3_7 __attribute__ ((aligned(32))); +static __m256i y0_m_5_1 __attribute__ ((aligned(32))); +static __m256i y0_m_5_3 __attribute__ ((aligned(32))); +static __m256i y0_m_5_5 __attribute__ ((aligned(32))); +static __m256i y0_m_5_7 __attribute__ ((aligned(32))); +static __m256i y0_m_7_1 __attribute__ ((aligned(32))); +static __m256i y0_m_7_3 __attribute__ ((aligned(32))); +static __m256i y0_m_7_5 __attribute__ ((aligned(32))); +static __m256i y0_m_7_7 __attribute__ ((aligned(32))); + +static __m256i xmm0 __attribute__ ((aligned(32))); +static __m256i xmm1 __attribute__ ((aligned(32))); +static __m256i xmm2 __attribute__ ((aligned(32))); +static __m256i xmm3 __attribute__ ((aligned(32))); +static __m256i xmm4 __attribute__ ((aligned(32))); +static __m256i xmm5 __attribute__ ((aligned(32))); +static __m256i xmm6 __attribute__ ((aligned(32))); +static __m256i xmm7 __attribute__ ((aligned(32))); +static __m256i xmm8 __attribute__ ((aligned(32))); + +static __m256i y0r __attribute__ ((aligned(32))); +static __m256i y0i __attribute__ ((aligned(32))); +static __m256i y1r __attribute__ ((aligned(32))); +static __m256i y1i __attribute__ ((aligned(32))); +static __m256i y2r __attribute__ ((aligned(32))); +static __m256i y2i __attribute__ ((aligned(32))); + +static __m256i logmax_num_re0 __attribute__ ((aligned(32))); +static __m256i logmax_den_re0 __attribute__ ((aligned(32))); + +static __m256i tmp_result __attribute__ ((aligned(32))); +static __m256i tmp_result2 __attribute__ ((aligned(32))); +static __m256i tmp_result3 __attribute__ ((aligned(32))); +static __m256i tmp_result4 __attribute__ ((aligned(32))); + +//============================================================================================== +// Auxiliary Makros + +// calculate interference magnitude +#define interference_abs_epi16(psi,int_ch_mag,int_mag,c1,c2) tmp_result = _mm256_cmpgt_epi16(int_ch_mag,psi); tmp_result2 = _mm256_xor_si256(tmp_result,(*(__m256i*)&ones256[0])); tmp_result = _mm256_and_si256(tmp_result,c1); tmp_result2 = _mm256_and_si256(tmp_result2,c2); int_mag = _mm256_or_si256(tmp_result,tmp_result2); + +// calculate interference magnitude +// tmp_result = ones in shorts corr. to interval 2<=x<=4, tmp_result2 interval < 2, tmp_result3 interval 4<x<6 and tmp_result4 interval x>6 +#define interference_abs_64qam_epi16(psi,int_ch_mag,int_two_ch_mag,int_three_ch_mag,a,c1,c3,c5,c7) tmp_result = _mm256_cmpgt_epi16(int_two_ch_mag,psi); tmp_result3 = _mm256_xor_si256(tmp_result,(*(__m256i*)&ones256[0])); tmp_result2 = _mm256_cmpgt_epi16(int_ch_mag,psi); tmp_result = _mm256_xor_si256(tmp_result,tmp_result2); tmp_result4 = _mm256_cmpgt_epi16(psi,int_three_ch_mag); tmp_result3 = _mm256_xor_si256(tmp_result3,tmp_result4); tmp_result = _mm256_and_si256(tmp_result,c3); tmp_result2 = _mm256_and_si256(tmp_result2,c1); tmp_result3 = _mm256_and_si256(tmp_result3,c5); tmp_result4 = _mm256_and_si256(tmp_result4,c7); tmp_result = _mm256_or_si256(tmp_result,tmp_result2); tmp_result3 = _mm256_or_si256(tmp_result3,tmp_result4); a = _mm256_or_si256(tmp_result,tmp_result3); + +// calculates psi_a = psi_r*a_r + psi_i*a_i +#define prodsum_psi_a_epi16(psi_r,a_r,psi_i,a_i,psi_a) tmp_result = _mm256_mulhi_epi16(psi_r,a_r); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result2 = _mm256_mulhi_epi16(psi_i,a_i); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); psi_a = _mm256_adds_epi16(tmp_result,tmp_result2); + +// calculates a_sq = int_ch_mag*(a_r^2 + a_i^2)*scale_factor +#define square_a_epi16(a_r,a_i,int_ch_mag,scale_factor,a_sq) tmp_result = _mm256_mulhi_epi16(a_r,a_r); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result = _mm256_mulhi_epi16(tmp_result,scale_factor); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result = _mm256_mulhi_epi16(tmp_result,int_ch_mag); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result2 = _mm256_mulhi_epi16(a_i,a_i); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); tmp_result2 = _mm256_mulhi_epi16(tmp_result2,scale_factor); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); tmp_result2 = _mm256_mulhi_epi16(tmp_result2,int_ch_mag); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); a_sq = _mm256_adds_epi16(tmp_result,tmp_result2); + +// calculates a_sq = int_ch_mag*(a_r^2 + a_i^2)*scale_factor for 64-QAM +#define square_a_64qam_epi16(a_r,a_i,int_ch_mag,scale_factor,a_sq) tmp_result = _mm256_mulhi_epi16(a_r,a_r); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result = _mm256_mulhi_epi16(tmp_result,scale_factor); tmp_result = _mm256_slli_epi16(tmp_result,3); tmp_result = _mm256_mulhi_epi16(tmp_result,int_ch_mag); tmp_result = _mm256_slli_epi16(tmp_result,1); tmp_result2 = _mm256_mulhi_epi16(a_i,a_i); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); tmp_result2 = _mm256_mulhi_epi16(tmp_result2,scale_factor); tmp_result2 = _mm256_slli_epi16(tmp_result2,3); tmp_result2 = _mm256_mulhi_epi16(tmp_result2,int_ch_mag); tmp_result2 = _mm256_slli_epi16(tmp_result2,1); a_sq = _mm256_adds_epi16(tmp_result,tmp_result2); + +void seperate_real_imag_parts(__m256i *out_re, + __m256i *out_im, + __m256i in0, + __m256i in1) +{ + __m256i tmp0; + __m256i tmp1; + + in0 = _mm256_shufflelo_epi16(in0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + in0 = _mm256_shufflehi_epi16(in0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + in0 = _mm256_shuffle_epi32(in0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + + in1 = _mm256_shufflelo_epi16(in1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + in1 = _mm256_shufflehi_epi16(in1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + in1 = _mm256_shuffle_epi32(in1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + + //in0 = [Re(0,1,2,3) Im(0,1,2,3) Re(4,5,6,7) Im(4,5,6,7)] + //in0 = [Re(8,9,10,11) Im(8,9,10,11) Re(12,13,14,15) Im(12,13,14,15)] + + tmp0 = _mm256_unpacklo_epi64(in0, in1); + //axmm2 = [Re(0,1,2,3) Re(8,9,10,11) Re(4,5,6,7) Re(12,13,14,15)] + tmp0 = _mm256_permute4x64_epi64(tmp0,0xd8); // Re(rho) + + tmp1 = _mm256_unpackhi_epi64(in0, in1); + //axmm3 = [Im(0,1,2,3) Im(8,9,10,11) Im(4,5,6,7) Im(12,13,14,15)] + tmp1 = _mm256_permute4x64_epi64(tmp1,0xd8); // Im(rho) + + *out_re = tmp0; + *out_im = tmp1; +} + +void qam64_qam16_avx2(short *stream0_in, + short *stream1_in, + short *ch_mag, + short *ch_mag_i, + short *stream0_out, + short *rho01, + int length + ) +{ + + /* + Author: S. Wagner + Date: 31-07-12 + + Input: + stream0_in: MF filter for 1st stream, i.e., y0=h0'*y + stream1_in: MF filter for 2nd stream, i.e., y1=h1'*y + ch_mag: 4*h0/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc + ch_mag_i: 4*h1/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc + rho01: Channel cross correlation, i.e., h1'*h0 + + Output: + stream0_out: output LLRs for 1st stream + */ + +#if defined(__x86_64__) || defined(__i386__) + + __m256i *rho01_256i = (__m256i *)rho01; + __m256i *stream0_256i_in = (__m256i *)stream0_in; + __m256i *stream1_256i_in = (__m256i *)stream1_in; + __m256i *ch_mag_256i = (__m256i *)ch_mag; + __m256i *ch_mag_256i_i = (__m256i *)ch_mag_i; + + __m256i ONE_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(10112)); // round(1/sqrt(42)*2^16) + __m256i THREE_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(30337)); // round(3/sqrt(42)*2^16) + __m256i FIVE_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(25281)); // round(5/sqrt(42)*2^15) + __m256i SEVEN_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(17697)); // round(5/sqrt(42)*2^15) + __m256i FORTYNINE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(30969)); // round(49/(4*sqrt(42))*2^14), Q2.14 + __m256i THIRTYSEVEN_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(23385)); // round(37/(4*sqrt(42))*2^14), Q2.14 + __m256i TWENTYFIVE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(31601)); // round(25/(4*sqrt(42))*2^15) + __m256i TWENTYNINE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(18329)); // round(29/(4*sqrt(42))*2^15), Q2.14 + __m256i SEVENTEEN_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(21489)); // round(17/(4*sqrt(42))*2^15) + __m256i NINE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(11376)); // round(9/(4*sqrt(42))*2^15) + __m256i THIRTEEN_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(16433)); // round(13/(4*sqrt(42))*2^15) + __m256i FIVE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(6320)); // round(5/(4*sqrt(42))*2^15) + __m256i ONE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(1264)); // round(1/(4*sqrt(42))*2^15) + __m256i ONE_OVER_SQRT_10_Q15 = _mm256_broadcastw_epi16(_mm_set1_epi16(10362)); // round(1/sqrt(10)*2^15) + __m256i THREE_OVER_SQRT_10 = _mm256_broadcastw_epi16(_mm_set1_epi16(31086)); // round(3/sqrt(10)*2^15) + __m256i SQRT_10_OVER_FOUR = _mm256_broadcastw_epi16(_mm_set1_epi16(25905)); // round(sqrt(10)/4*2^15) + + + __m256i ch_mag_int; + __m256i ch_mag_des; + __m256i ch_mag_98_over_42_with_sigma2; + __m256i ch_mag_74_over_42_with_sigma2; + __m256i ch_mag_58_over_42_with_sigma2; + __m256i ch_mag_50_over_42_with_sigma2; + __m256i ch_mag_34_over_42_with_sigma2; + __m256i ch_mag_18_over_42_with_sigma2; + __m256i ch_mag_26_over_42_with_sigma2; + __m256i ch_mag_10_over_42_with_sigma2; + __m256i ch_mag_2_over_42_with_sigma2; + __m256i y0r_one_over_sqrt_21; + __m256i y0r_three_over_sqrt_21; + __m256i y0r_five_over_sqrt_21; + __m256i y0r_seven_over_sqrt_21; + __m256i y0i_one_over_sqrt_21; + __m256i y0i_three_over_sqrt_21; + __m256i y0i_five_over_sqrt_21; + __m256i y0i_seven_over_sqrt_21; + +#elif defined(__arm__) + +#endif + int i,j; + uint32_t len256 = (length)>>3; + + for (i=0; i<len256; i+=2) { + +#if defined(__x86_64__) || defined(__i386__) + // Get rho + /* + xmm0 = rho01_128i[i]; + xmm1 = rho01_128i[i+1]; + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + xmm2 = _mm_unpacklo_epi64(xmm0,xmm1); // Re(rho) + xmm3 = _mm_unpackhi_epi64(xmm0,xmm1); // Im(rho) + */ + seperate_real_imag_parts(&xmm2, &xmm3, rho01_256i[i], rho01_256i[i+1]); + + rho_rpi = _mm256_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho) + rho_rmi = _mm256_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho) + + // Compute the different rhos + rho_rpi_1_1 = _mm256_mulhi_epi16(rho_rpi, ONE_OVER_SQRT_42); + rho_rmi_1_1 = _mm256_mulhi_epi16(rho_rmi, ONE_OVER_SQRT_42); + rho_rpi_3_3 = _mm256_mulhi_epi16(rho_rpi, THREE_OVER_SQRT_42); + rho_rmi_3_3 = _mm256_mulhi_epi16(rho_rmi, THREE_OVER_SQRT_42); + rho_rpi_5_5 = _mm256_mulhi_epi16(rho_rpi, FIVE_OVER_SQRT_42); + rho_rmi_5_5 = _mm256_mulhi_epi16(rho_rmi, FIVE_OVER_SQRT_42); + rho_rpi_7_7 = _mm256_mulhi_epi16(rho_rpi, SEVEN_OVER_SQRT_42); + rho_rmi_7_7 = _mm256_mulhi_epi16(rho_rmi, SEVEN_OVER_SQRT_42); + + rho_rpi_5_5 = _mm256_slli_epi16(rho_rpi_5_5, 1); + rho_rmi_5_5 = _mm256_slli_epi16(rho_rmi_5_5, 1); + rho_rpi_7_7 = _mm256_slli_epi16(rho_rpi_7_7, 2); + rho_rmi_7_7 = _mm256_slli_epi16(rho_rmi_7_7, 2); + + xmm4 = _mm256_mulhi_epi16(xmm2, ONE_OVER_SQRT_42); + xmm5 = _mm256_mulhi_epi16(xmm3, ONE_OVER_SQRT_42); + xmm6 = _mm256_mulhi_epi16(xmm3, THREE_OVER_SQRT_42); + xmm7 = _mm256_mulhi_epi16(xmm3, FIVE_OVER_SQRT_42); + xmm8 = _mm256_mulhi_epi16(xmm3, SEVEN_OVER_SQRT_42); + xmm7 = _mm256_slli_epi16(xmm7, 1); + xmm8 = _mm256_slli_epi16(xmm8, 2); + + rho_rpi_1_3 = _mm256_adds_epi16(xmm4, xmm6); + rho_rmi_1_3 = _mm256_subs_epi16(xmm4, xmm6); + rho_rpi_1_5 = _mm256_adds_epi16(xmm4, xmm7); + rho_rmi_1_5 = _mm256_subs_epi16(xmm4, xmm7); + rho_rpi_1_7 = _mm256_adds_epi16(xmm4, xmm8); + rho_rmi_1_7 = _mm256_subs_epi16(xmm4, xmm8); + + xmm4 = _mm256_mulhi_epi16(xmm2, THREE_OVER_SQRT_42); + rho_rpi_3_1 = _mm256_adds_epi16(xmm4, xmm5); + rho_rmi_3_1 = _mm256_subs_epi16(xmm4, xmm5); + rho_rpi_3_5 = _mm256_adds_epi16(xmm4, xmm7); + rho_rmi_3_5 = _mm256_subs_epi16(xmm4, xmm7); + rho_rpi_3_7 = _mm256_adds_epi16(xmm4, xmm8); + rho_rmi_3_7 = _mm256_subs_epi16(xmm4, xmm8); + + xmm4 = _mm256_mulhi_epi16(xmm2, FIVE_OVER_SQRT_42); + xmm4 = _mm256_slli_epi16(xmm4, 1); + rho_rpi_5_1 = _mm256_adds_epi16(xmm4, xmm5); + rho_rmi_5_1 = _mm256_subs_epi16(xmm4, xmm5); + rho_rpi_5_3 = _mm256_adds_epi16(xmm4, xmm6); + rho_rmi_5_3 = _mm256_subs_epi16(xmm4, xmm6); + rho_rpi_5_7 = _mm256_adds_epi16(xmm4, xmm8); + rho_rmi_5_7 = _mm256_subs_epi16(xmm4, xmm8); + + xmm4 = _mm256_mulhi_epi16(xmm2, SEVEN_OVER_SQRT_42); + xmm4 = _mm256_slli_epi16(xmm4, 2); + rho_rpi_7_1 = _mm256_adds_epi16(xmm4, xmm5); + rho_rmi_7_1 = _mm256_subs_epi16(xmm4, xmm5); + rho_rpi_7_3 = _mm256_adds_epi16(xmm4, xmm6); + rho_rmi_7_3 = _mm256_subs_epi16(xmm4, xmm6); + rho_rpi_7_5 = _mm256_adds_epi16(xmm4, xmm7); + rho_rmi_7_5 = _mm256_subs_epi16(xmm4, xmm7); + + // Rearrange interfering MF output + /* + xmm0 = stream1_128i_in[i]; + xmm1 = stream1_128i_in[i+1]; + xmm0 = _mm256_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm256_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm256_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + y1r = _mm256_unpacklo_epi64(xmm0,xmm1); //[y1r(1),y1r(2),y1r(3),y1r(4)] + y1i = _mm256_unpackhi_epi64(xmm0,xmm1); //[y1i(1),y1i(2),y1i(3),y1i(4)] + */ + + seperate_real_imag_parts(&y1r, &y1i, stream1_256i_in[i], stream1_256i_in[i+1]); + + // Psi_r calculation from rho_rpi or rho_rmi + xmm0 = _mm256_broadcastw_epi16(_mm_set1_epi16(0));// ZERO for abs_pi16 + xmm2 = _mm256_subs_epi16(rho_rpi_7_7, y1r); + psi_r_p7_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_5, y1r); + psi_r_p7_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_3, y1r); + psi_r_p7_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_1, y1r); + psi_r_p7_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_1, y1r); + psi_r_p7_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_3, y1r); + psi_r_p7_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_5, y1r); + psi_r_p7_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_7, y1r); + psi_r_p7_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_7, y1r); + psi_r_p5_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_5, y1r); + psi_r_p5_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_3, y1r); + psi_r_p5_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_1, y1r); + psi_r_p5_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_1, y1r); + psi_r_p5_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_3, y1r); + psi_r_p5_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_5, y1r); + psi_r_p5_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_7, y1r); + psi_r_p5_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_7, y1r); + psi_r_p3_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_5, y1r); + psi_r_p3_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_3, y1r); + psi_r_p3_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_1, y1r); + psi_r_p3_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_1, y1r); + psi_r_p3_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_3, y1r); + psi_r_p3_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_5, y1r); + psi_r_p3_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_7, y1r); + psi_r_p3_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_7, y1r); + psi_r_p1_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_5, y1r); + psi_r_p1_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_3, y1r); + psi_r_p1_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_1, y1r); + psi_r_p1_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_1, y1r); + psi_r_p1_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_3, y1r); + psi_r_p1_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_5, y1r); + psi_r_p1_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_7, y1r); + psi_r_p1_m7 = _mm256_abs_epi16(xmm2); + + xmm2 = _mm256_adds_epi16(rho_rmi_1_7, y1r); + psi_r_m1_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_5, y1r); + psi_r_m1_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_3, y1r); + psi_r_m1_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_1, y1r); + psi_r_m1_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_1, y1r); + psi_r_m1_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_3, y1r); + psi_r_m1_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_5, y1r); + psi_r_m1_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_7, y1r); + psi_r_m1_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_7, y1r); + psi_r_m3_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_5, y1r); + psi_r_m3_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_3, y1r); + psi_r_m3_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_1, y1r); + psi_r_m3_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_1, y1r); + psi_r_m3_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_3, y1r); + psi_r_m3_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_5, y1r); + psi_r_m3_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_7, y1r); + psi_r_m3_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_7, y1r); + psi_r_m5_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_5, y1r); + psi_r_m5_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_3, y1r); + psi_r_m5_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_1, y1r); + psi_r_m5_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_1, y1r); + psi_r_m5_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_3, y1r); + psi_r_m5_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_5, y1r); + psi_r_m5_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_7, y1r); + psi_r_m5_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_7, y1r); + psi_r_m7_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_5, y1r); + psi_r_m7_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_3, y1r); + psi_r_m7_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_1, y1r); + psi_r_m7_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_1, y1r); + psi_r_m7_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_3, y1r); + psi_r_m7_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_5, y1r); + psi_r_m7_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_7, y1r); + psi_r_m7_m7 = _mm256_abs_epi16(xmm2); + + // Psi_i calculation from rho_rpi or rho_rmi + xmm2 = _mm256_subs_epi16(rho_rmi_7_7, y1i); + psi_i_p7_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_7, y1i); + psi_i_p7_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_7, y1i); + psi_i_p7_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_7, y1i); + psi_i_p7_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_7, y1i); + psi_i_p7_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_7, y1i); + psi_i_p7_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_7, y1i); + psi_i_p7_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_7, y1i); + psi_i_p7_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_5, y1i); + psi_i_p5_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_5, y1i); + psi_i_p5_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_5, y1i); + psi_i_p5_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_5, y1i); + psi_i_p5_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_5, y1i); + psi_i_p5_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_5, y1i); + psi_i_p5_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_5, y1i); + psi_i_p5_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_5, y1i); + psi_i_p5_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_3, y1i); + psi_i_p3_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_3, y1i); + psi_i_p3_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_3, y1i); + psi_i_p3_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_3, y1i); + psi_i_p3_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_3, y1i); + psi_i_p3_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_3, y1i); + psi_i_p3_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_3, y1i); + psi_i_p3_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_3, y1i); + psi_i_p3_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_1, y1i); + psi_i_p1_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_1, y1i); + psi_i_p1_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_1, y1i); + psi_i_p1_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_1, y1i); + psi_i_p1_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_1, y1i); + psi_i_p1_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_1, y1i); + psi_i_p1_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_1, y1i); + psi_i_p1_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_1, y1i); + psi_i_p1_m7 = _mm256_abs_epi16(xmm2); + + xmm2 = _mm256_subs_epi16(rho_rpi_7_1, y1i); + psi_i_m1_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_1, y1i); + psi_i_m1_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_1, y1i); + psi_i_m1_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_1, y1i); + psi_i_m1_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_1, y1i); + psi_i_m1_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_1, y1i); + psi_i_m1_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_1, y1i); + psi_i_m1_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_1, y1i); + psi_i_m1_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_3, y1i); + psi_i_m3_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_3, y1i); + psi_i_m3_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_3, y1i); + psi_i_m3_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_3, y1i); + psi_i_m3_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_3, y1i); + psi_i_m3_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_3, y1i); + psi_i_m3_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_3, y1i); + psi_i_m3_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_3, y1i); + psi_i_m3_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_5, y1i); + psi_i_m5_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_5, y1i); + psi_i_m5_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_5, y1i); + psi_i_m5_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_5, y1i); + psi_i_m5_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_5, y1i); + psi_i_m5_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_5, y1i); + psi_i_m5_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_5, y1i); + psi_i_m5_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_5, y1i); + psi_i_m5_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_7, y1i); + psi_i_m7_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_7, y1i); + psi_i_m7_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_7, y1i); + psi_i_m7_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_7, y1i); + psi_i_m7_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_7, y1i); + psi_i_m7_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_7, y1i); + psi_i_m7_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_7, y1i); + psi_i_m7_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_7, y1i); + psi_i_m7_m7 = _mm256_abs_epi16(xmm2); + +/* + // Rearrange desired MF output + xmm0 = stream0_128i_in[i]; + xmm1 = stream0_128i_in[i+1]; + xmm0 = _mm256_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm256_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm256_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + y0r = _mm256_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)] + y0i = _mm256_unpackhi_epi64(xmm0,xmm1); +*/ + seperate_real_imag_parts(&y0r, &y0i, stream0_256i_in[i], stream0_256i_in[i+1]); + + /* + // Rearrange desired channel magnitudes + xmm2 = ch_mag_128i[i]; // = [|h|^2(1),|h|^2(1),|h|^2(2),|h|^2(2)]*(2/sqrt(10)) + xmm3 = ch_mag_128i[i+1]; // = [|h|^2(3),|h|^2(3),|h|^2(4),|h|^2(4)]*(2/sqrt(10)) + xmm2 = _mm256_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm256_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm256_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm256_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm256_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm256_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + ch_mag_des = _mm256_unpacklo_epi64(xmm2,xmm3); + */ + + seperate_real_imag_parts(&ch_mag_des, &xmm2, ch_mag_256i[i], ch_mag_256i[i+1]); + + // Rearrange interfering channel magnitudes + /* + xmm2 = ch_mag_128i_i[i]; + xmm3 = ch_mag_128i_i[i+1]; + xmm2 = _mm256_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm256_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm256_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm256_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm256_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm256_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + ch_mag_int = _mm256_unpacklo_epi64(xmm2,xmm3); + */ + + seperate_real_imag_parts(&ch_mag_int, &xmm2, ch_mag_256i_i[i], ch_mag_256i_i[i+1]); + + y0r_one_over_sqrt_21 = _mm256_mulhi_epi16(y0r, ONE_OVER_SQRT_42); + y0r_three_over_sqrt_21 = _mm256_mulhi_epi16(y0r, THREE_OVER_SQRT_42); + y0r_five_over_sqrt_21 = _mm256_mulhi_epi16(y0r, FIVE_OVER_SQRT_42); + y0r_five_over_sqrt_21 = _mm256_slli_epi16(y0r_five_over_sqrt_21, 1); + y0r_seven_over_sqrt_21 = _mm256_mulhi_epi16(y0r, SEVEN_OVER_SQRT_42); + y0r_seven_over_sqrt_21 = _mm256_slli_epi16(y0r_seven_over_sqrt_21, 2); // Q2.14 + + y0i_one_over_sqrt_21 = _mm256_mulhi_epi16(y0i, ONE_OVER_SQRT_42); + y0i_three_over_sqrt_21 = _mm256_mulhi_epi16(y0i, THREE_OVER_SQRT_42); + y0i_five_over_sqrt_21 = _mm256_mulhi_epi16(y0i, FIVE_OVER_SQRT_42); + y0i_five_over_sqrt_21 = _mm256_slli_epi16(y0i_five_over_sqrt_21, 1); + y0i_seven_over_sqrt_21 = _mm256_mulhi_epi16(y0i, SEVEN_OVER_SQRT_42); + y0i_seven_over_sqrt_21 = _mm256_slli_epi16(y0i_seven_over_sqrt_21, 2); // Q2.14 + + y0_p_7_1 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_7_3 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_7_5 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_7_7 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_p_5_1 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_5_3 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_5_5 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_5_7 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_p_3_1 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_3_3 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_3_5 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_3_7 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_p_1_1 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_1_3 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_1_5 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_1_7 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21); + + y0_m_1_1 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_1_3 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_1_5 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_1_7 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_m_3_1 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_3_3 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_3_5 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_3_7 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_m_5_1 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_5_3 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_5_5 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_5_7 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_m_7_1 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_7_3 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_7_5 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_7_7 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21); + + interference_abs_epi16(psi_r_p7_p7, ch_mag_int, a_r_p7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_p5, ch_mag_int, a_r_p7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_p3, ch_mag_int, a_r_p7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_p1, ch_mag_int, a_r_p7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_m1, ch_mag_int, a_r_p7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_m3, ch_mag_int, a_r_p7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_m5, ch_mag_int, a_r_p7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p7_m7, ch_mag_int, a_r_p7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_p7, ch_mag_int, a_r_p5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_p5, ch_mag_int, a_r_p5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_p3, ch_mag_int, a_r_p5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_p1, ch_mag_int, a_r_p5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_m1, ch_mag_int, a_r_p5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_m3, ch_mag_int, a_r_p5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_m5, ch_mag_int, a_r_p5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p5_m7, ch_mag_int, a_r_p5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_p7, ch_mag_int, a_r_p3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_p5, ch_mag_int, a_r_p3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_p3, ch_mag_int, a_r_p3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_p1, ch_mag_int, a_r_p3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_m1, ch_mag_int, a_r_p3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_m3, ch_mag_int, a_r_p3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_m5, ch_mag_int, a_r_p3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p3_m7, ch_mag_int, a_r_p3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_p7, ch_mag_int, a_r_p1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_p5, ch_mag_int, a_r_p1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_p3, ch_mag_int, a_r_p1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_p1, ch_mag_int, a_r_p1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_m1, ch_mag_int, a_r_p1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_m3, ch_mag_int, a_r_p1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_m5, ch_mag_int, a_r_p1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_p1_m7, ch_mag_int, a_r_p1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_p7, ch_mag_int, a_r_m1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_p5, ch_mag_int, a_r_m1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_p3, ch_mag_int, a_r_m1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_p1, ch_mag_int, a_r_m1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_m1, ch_mag_int, a_r_m1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_m3, ch_mag_int, a_r_m1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_m5, ch_mag_int, a_r_m1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m1_m7, ch_mag_int, a_r_m1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_p7, ch_mag_int, a_r_m3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_p5, ch_mag_int, a_r_m3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_p3, ch_mag_int, a_r_m3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_p1, ch_mag_int, a_r_m3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_m1, ch_mag_int, a_r_m3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_m3, ch_mag_int, a_r_m3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_m5, ch_mag_int, a_r_m3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m3_m7, ch_mag_int, a_r_m3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_p7, ch_mag_int, a_r_m5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_p5, ch_mag_int, a_r_m5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_p3, ch_mag_int, a_r_m5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_p1, ch_mag_int, a_r_m5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_m1, ch_mag_int, a_r_m5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_m3, ch_mag_int, a_r_m5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_m5, ch_mag_int, a_r_m5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m5_m7, ch_mag_int, a_r_m5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_p7, ch_mag_int, a_r_m7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_p5, ch_mag_int, a_r_m7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_p3, ch_mag_int, a_r_m7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_p1, ch_mag_int, a_r_m7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_m1, ch_mag_int, a_r_m7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_m3, ch_mag_int, a_r_m7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_m5, ch_mag_int, a_r_m7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_r_m7_m7, ch_mag_int, a_r_m7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + + interference_abs_epi16(psi_i_p7_p7, ch_mag_int, a_i_p7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_p5, ch_mag_int, a_i_p7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_p3, ch_mag_int, a_i_p7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_p1, ch_mag_int, a_i_p7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_m1, ch_mag_int, a_i_p7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_m3, ch_mag_int, a_i_p7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_m5, ch_mag_int, a_i_p7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p7_m7, ch_mag_int, a_i_p7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_p7, ch_mag_int, a_i_p5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_p5, ch_mag_int, a_i_p5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_p3, ch_mag_int, a_i_p5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_p1, ch_mag_int, a_i_p5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_m1, ch_mag_int, a_i_p5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_m3, ch_mag_int, a_i_p5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_m5, ch_mag_int, a_i_p5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p5_m7, ch_mag_int, a_i_p5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_p7, ch_mag_int, a_i_p3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_p5, ch_mag_int, a_i_p3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_p3, ch_mag_int, a_i_p3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_p1, ch_mag_int, a_i_p3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_m1, ch_mag_int, a_i_p3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_m3, ch_mag_int, a_i_p3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_m5, ch_mag_int, a_i_p3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p3_m7, ch_mag_int, a_i_p3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_p7, ch_mag_int, a_i_p1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_p5, ch_mag_int, a_i_p1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_p3, ch_mag_int, a_i_p1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_p1, ch_mag_int, a_i_p1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_m1, ch_mag_int, a_i_p1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_m3, ch_mag_int, a_i_p1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_m5, ch_mag_int, a_i_p1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_p1_m7, ch_mag_int, a_i_p1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_p7, ch_mag_int, a_i_m1_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_p5, ch_mag_int, a_i_m1_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_p3, ch_mag_int, a_i_m1_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_p1, ch_mag_int, a_i_m1_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_m1, ch_mag_int, a_i_m1_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_m3, ch_mag_int, a_i_m1_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_m5, ch_mag_int, a_i_m1_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m1_m7, ch_mag_int, a_i_m1_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_p7, ch_mag_int, a_i_m3_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_p5, ch_mag_int, a_i_m3_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_p3, ch_mag_int, a_i_m3_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_p1, ch_mag_int, a_i_m3_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_m1, ch_mag_int, a_i_m3_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_m3, ch_mag_int, a_i_m3_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_m5, ch_mag_int, a_i_m3_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m3_m7, ch_mag_int, a_i_m3_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_p7, ch_mag_int, a_i_m5_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_p5, ch_mag_int, a_i_m5_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_p3, ch_mag_int, a_i_m5_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_p1, ch_mag_int, a_i_m5_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_m1, ch_mag_int, a_i_m5_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_m3, ch_mag_int, a_i_m5_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_m5, ch_mag_int, a_i_m5_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m5_m7, ch_mag_int, a_i_m5_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_p7, ch_mag_int, a_i_m7_p7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_p5, ch_mag_int, a_i_m7_p5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_p3, ch_mag_int, a_i_m7_p3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_p1, ch_mag_int, a_i_m7_p1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_m1, ch_mag_int, a_i_m7_m1, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_m3, ch_mag_int, a_i_m7_m3, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_m5, ch_mag_int, a_i_m7_m5, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + interference_abs_epi16(psi_i_m7_m7, ch_mag_int, a_i_m7_m7, ONE_OVER_SQRT_10_Q15, THREE_OVER_SQRT_10); + + // Calculation of a group of two terms in the bit metric involving product of psi and interference + prodsum_psi_a_epi16(psi_r_p7_p7, a_r_p7_p7, psi_i_p7_p7, a_i_p7_p7, psi_a_p7_p7); + prodsum_psi_a_epi16(psi_r_p7_p5, a_r_p7_p5, psi_i_p7_p5, a_i_p7_p5, psi_a_p7_p5); + prodsum_psi_a_epi16(psi_r_p7_p3, a_r_p7_p3, psi_i_p7_p3, a_i_p7_p3, psi_a_p7_p3); + prodsum_psi_a_epi16(psi_r_p7_p1, a_r_p7_p1, psi_i_p7_p1, a_i_p7_p1, psi_a_p7_p1); + prodsum_psi_a_epi16(psi_r_p7_m1, a_r_p7_m1, psi_i_p7_m1, a_i_p7_m1, psi_a_p7_m1); + prodsum_psi_a_epi16(psi_r_p7_m3, a_r_p7_m3, psi_i_p7_m3, a_i_p7_m3, psi_a_p7_m3); + prodsum_psi_a_epi16(psi_r_p7_m5, a_r_p7_m5, psi_i_p7_m5, a_i_p7_m5, psi_a_p7_m5); + prodsum_psi_a_epi16(psi_r_p7_m7, a_r_p7_m7, psi_i_p7_m7, a_i_p7_m7, psi_a_p7_m7); + prodsum_psi_a_epi16(psi_r_p5_p7, a_r_p5_p7, psi_i_p5_p7, a_i_p5_p7, psi_a_p5_p7); + prodsum_psi_a_epi16(psi_r_p5_p5, a_r_p5_p5, psi_i_p5_p5, a_i_p5_p5, psi_a_p5_p5); + prodsum_psi_a_epi16(psi_r_p5_p3, a_r_p5_p3, psi_i_p5_p3, a_i_p5_p3, psi_a_p5_p3); + prodsum_psi_a_epi16(psi_r_p5_p1, a_r_p5_p1, psi_i_p5_p1, a_i_p5_p1, psi_a_p5_p1); + prodsum_psi_a_epi16(psi_r_p5_m1, a_r_p5_m1, psi_i_p5_m1, a_i_p5_m1, psi_a_p5_m1); + prodsum_psi_a_epi16(psi_r_p5_m3, a_r_p5_m3, psi_i_p5_m3, a_i_p5_m3, psi_a_p5_m3); + prodsum_psi_a_epi16(psi_r_p5_m5, a_r_p5_m5, psi_i_p5_m5, a_i_p5_m5, psi_a_p5_m5); + prodsum_psi_a_epi16(psi_r_p5_m7, a_r_p5_m7, psi_i_p5_m7, a_i_p5_m7, psi_a_p5_m7); + prodsum_psi_a_epi16(psi_r_p3_p7, a_r_p3_p7, psi_i_p3_p7, a_i_p3_p7, psi_a_p3_p7); + prodsum_psi_a_epi16(psi_r_p3_p5, a_r_p3_p5, psi_i_p3_p5, a_i_p3_p5, psi_a_p3_p5); + prodsum_psi_a_epi16(psi_r_p3_p3, a_r_p3_p3, psi_i_p3_p3, a_i_p3_p3, psi_a_p3_p3); + prodsum_psi_a_epi16(psi_r_p3_p1, a_r_p3_p1, psi_i_p3_p1, a_i_p3_p1, psi_a_p3_p1); + prodsum_psi_a_epi16(psi_r_p3_m1, a_r_p3_m1, psi_i_p3_m1, a_i_p3_m1, psi_a_p3_m1); + prodsum_psi_a_epi16(psi_r_p3_m3, a_r_p3_m3, psi_i_p3_m3, a_i_p3_m3, psi_a_p3_m3); + prodsum_psi_a_epi16(psi_r_p3_m5, a_r_p3_m5, psi_i_p3_m5, a_i_p3_m5, psi_a_p3_m5); + prodsum_psi_a_epi16(psi_r_p3_m7, a_r_p3_m7, psi_i_p3_m7, a_i_p3_m7, psi_a_p3_m7); + prodsum_psi_a_epi16(psi_r_p1_p7, a_r_p1_p7, psi_i_p1_p7, a_i_p1_p7, psi_a_p1_p7); + prodsum_psi_a_epi16(psi_r_p1_p5, a_r_p1_p5, psi_i_p1_p5, a_i_p1_p5, psi_a_p1_p5); + prodsum_psi_a_epi16(psi_r_p1_p3, a_r_p1_p3, psi_i_p1_p3, a_i_p1_p3, psi_a_p1_p3); + prodsum_psi_a_epi16(psi_r_p1_p1, a_r_p1_p1, psi_i_p1_p1, a_i_p1_p1, psi_a_p1_p1); + prodsum_psi_a_epi16(psi_r_p1_m1, a_r_p1_m1, psi_i_p1_m1, a_i_p1_m1, psi_a_p1_m1); + prodsum_psi_a_epi16(psi_r_p1_m3, a_r_p1_m3, psi_i_p1_m3, a_i_p1_m3, psi_a_p1_m3); + prodsum_psi_a_epi16(psi_r_p1_m5, a_r_p1_m5, psi_i_p1_m5, a_i_p1_m5, psi_a_p1_m5); + prodsum_psi_a_epi16(psi_r_p1_m7, a_r_p1_m7, psi_i_p1_m7, a_i_p1_m7, psi_a_p1_m7); + prodsum_psi_a_epi16(psi_r_m1_p7, a_r_m1_p7, psi_i_m1_p7, a_i_m1_p7, psi_a_m1_p7); + prodsum_psi_a_epi16(psi_r_m1_p5, a_r_m1_p5, psi_i_m1_p5, a_i_m1_p5, psi_a_m1_p5); + prodsum_psi_a_epi16(psi_r_m1_p3, a_r_m1_p3, psi_i_m1_p3, a_i_m1_p3, psi_a_m1_p3); + prodsum_psi_a_epi16(psi_r_m1_p1, a_r_m1_p1, psi_i_m1_p1, a_i_m1_p1, psi_a_m1_p1); + prodsum_psi_a_epi16(psi_r_m1_m1, a_r_m1_m1, psi_i_m1_m1, a_i_m1_m1, psi_a_m1_m1); + prodsum_psi_a_epi16(psi_r_m1_m3, a_r_m1_m3, psi_i_m1_m3, a_i_m1_m3, psi_a_m1_m3); + prodsum_psi_a_epi16(psi_r_m1_m5, a_r_m1_m5, psi_i_m1_m5, a_i_m1_m5, psi_a_m1_m5); + prodsum_psi_a_epi16(psi_r_m1_m7, a_r_m1_m7, psi_i_m1_m7, a_i_m1_m7, psi_a_m1_m7); + prodsum_psi_a_epi16(psi_r_m3_p7, a_r_m3_p7, psi_i_m3_p7, a_i_m3_p7, psi_a_m3_p7); + prodsum_psi_a_epi16(psi_r_m3_p5, a_r_m3_p5, psi_i_m3_p5, a_i_m3_p5, psi_a_m3_p5); + prodsum_psi_a_epi16(psi_r_m3_p3, a_r_m3_p3, psi_i_m3_p3, a_i_m3_p3, psi_a_m3_p3); + prodsum_psi_a_epi16(psi_r_m3_p1, a_r_m3_p1, psi_i_m3_p1, a_i_m3_p1, psi_a_m3_p1); + prodsum_psi_a_epi16(psi_r_m3_m1, a_r_m3_m1, psi_i_m3_m1, a_i_m3_m1, psi_a_m3_m1); + prodsum_psi_a_epi16(psi_r_m3_m3, a_r_m3_m3, psi_i_m3_m3, a_i_m3_m3, psi_a_m3_m3); + prodsum_psi_a_epi16(psi_r_m3_m5, a_r_m3_m5, psi_i_m3_m5, a_i_m3_m5, psi_a_m3_m5); + prodsum_psi_a_epi16(psi_r_m3_m7, a_r_m3_m7, psi_i_m3_m7, a_i_m3_m7, psi_a_m3_m7); + prodsum_psi_a_epi16(psi_r_m5_p7, a_r_m5_p7, psi_i_m5_p7, a_i_m5_p7, psi_a_m5_p7); + prodsum_psi_a_epi16(psi_r_m5_p5, a_r_m5_p5, psi_i_m5_p5, a_i_m5_p5, psi_a_m5_p5); + prodsum_psi_a_epi16(psi_r_m5_p3, a_r_m5_p3, psi_i_m5_p3, a_i_m5_p3, psi_a_m5_p3); + prodsum_psi_a_epi16(psi_r_m5_p1, a_r_m5_p1, psi_i_m5_p1, a_i_m5_p1, psi_a_m5_p1); + prodsum_psi_a_epi16(psi_r_m5_m1, a_r_m5_m1, psi_i_m5_m1, a_i_m5_m1, psi_a_m5_m1); + prodsum_psi_a_epi16(psi_r_m5_m3, a_r_m5_m3, psi_i_m5_m3, a_i_m5_m3, psi_a_m5_m3); + prodsum_psi_a_epi16(psi_r_m5_m5, a_r_m5_m5, psi_i_m5_m5, a_i_m5_m5, psi_a_m5_m5); + prodsum_psi_a_epi16(psi_r_m5_m7, a_r_m5_m7, psi_i_m5_m7, a_i_m5_m7, psi_a_m5_m7); + prodsum_psi_a_epi16(psi_r_m7_p7, a_r_m7_p7, psi_i_m7_p7, a_i_m7_p7, psi_a_m7_p7); + prodsum_psi_a_epi16(psi_r_m7_p5, a_r_m7_p5, psi_i_m7_p5, a_i_m7_p5, psi_a_m7_p5); + prodsum_psi_a_epi16(psi_r_m7_p3, a_r_m7_p3, psi_i_m7_p3, a_i_m7_p3, psi_a_m7_p3); + prodsum_psi_a_epi16(psi_r_m7_p1, a_r_m7_p1, psi_i_m7_p1, a_i_m7_p1, psi_a_m7_p1); + prodsum_psi_a_epi16(psi_r_m7_m1, a_r_m7_m1, psi_i_m7_m1, a_i_m7_m1, psi_a_m7_m1); + prodsum_psi_a_epi16(psi_r_m7_m3, a_r_m7_m3, psi_i_m7_m3, a_i_m7_m3, psi_a_m7_m3); + prodsum_psi_a_epi16(psi_r_m7_m5, a_r_m7_m5, psi_i_m7_m5, a_i_m7_m5, psi_a_m7_m5); + prodsum_psi_a_epi16(psi_r_m7_m7, a_r_m7_m7, psi_i_m7_m7, a_i_m7_m7, psi_a_m7_m7); + + // Calculation of a group of two terms in the bit metric involving squares of interference + square_a_epi16(a_r_p7_p7, a_i_p7_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p7); + square_a_epi16(a_r_p7_p5, a_i_p7_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p5); + square_a_epi16(a_r_p7_p3, a_i_p7_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p3); + square_a_epi16(a_r_p7_p1, a_i_p7_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_p1); + square_a_epi16(a_r_p7_m1, a_i_p7_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m1); + square_a_epi16(a_r_p7_m3, a_i_p7_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m3); + square_a_epi16(a_r_p7_m5, a_i_p7_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m5); + square_a_epi16(a_r_p7_m7, a_i_p7_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p7_m7); + square_a_epi16(a_r_p5_p7, a_i_p5_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p7); + square_a_epi16(a_r_p5_p5, a_i_p5_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p5); + square_a_epi16(a_r_p5_p3, a_i_p5_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p3); + square_a_epi16(a_r_p5_p1, a_i_p5_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_p1); + square_a_epi16(a_r_p5_m1, a_i_p5_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m1); + square_a_epi16(a_r_p5_m3, a_i_p5_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m3); + square_a_epi16(a_r_p5_m5, a_i_p5_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m5); + square_a_epi16(a_r_p5_m7, a_i_p5_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p5_m7); + square_a_epi16(a_r_p3_p7, a_i_p3_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p7); + square_a_epi16(a_r_p3_p5, a_i_p3_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p5); + square_a_epi16(a_r_p3_p3, a_i_p3_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p3); + square_a_epi16(a_r_p3_p1, a_i_p3_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_p1); + square_a_epi16(a_r_p3_m1, a_i_p3_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m1); + square_a_epi16(a_r_p3_m3, a_i_p3_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m3); + square_a_epi16(a_r_p3_m5, a_i_p3_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m5); + square_a_epi16(a_r_p3_m7, a_i_p3_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p3_m7); + square_a_epi16(a_r_p1_p7, a_i_p1_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p7); + square_a_epi16(a_r_p1_p5, a_i_p1_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p5); + square_a_epi16(a_r_p1_p3, a_i_p1_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p3); + square_a_epi16(a_r_p1_p1, a_i_p1_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_p1); + square_a_epi16(a_r_p1_m1, a_i_p1_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m1); + square_a_epi16(a_r_p1_m3, a_i_p1_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m3); + square_a_epi16(a_r_p1_m5, a_i_p1_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m5); + square_a_epi16(a_r_p1_m7, a_i_p1_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_p1_m7); + square_a_epi16(a_r_m1_p7, a_i_m1_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p7); + square_a_epi16(a_r_m1_p5, a_i_m1_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p5); + square_a_epi16(a_r_m1_p3, a_i_m1_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p3); + square_a_epi16(a_r_m1_p1, a_i_m1_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_p1); + square_a_epi16(a_r_m1_m1, a_i_m1_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m1); + square_a_epi16(a_r_m1_m3, a_i_m1_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m3); + square_a_epi16(a_r_m1_m5, a_i_m1_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m5); + square_a_epi16(a_r_m1_m7, a_i_m1_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m1_m7); + square_a_epi16(a_r_m3_p7, a_i_m3_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p7); + square_a_epi16(a_r_m3_p5, a_i_m3_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p5); + square_a_epi16(a_r_m3_p3, a_i_m3_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p3); + square_a_epi16(a_r_m3_p1, a_i_m3_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_p1); + square_a_epi16(a_r_m3_m1, a_i_m3_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m1); + square_a_epi16(a_r_m3_m3, a_i_m3_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m3); + square_a_epi16(a_r_m3_m5, a_i_m3_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m5); + square_a_epi16(a_r_m3_m7, a_i_m3_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m3_m7); + square_a_epi16(a_r_m5_p7, a_i_m5_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p7); + square_a_epi16(a_r_m5_p5, a_i_m5_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p5); + square_a_epi16(a_r_m5_p3, a_i_m5_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p3); + square_a_epi16(a_r_m5_p1, a_i_m5_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_p1); + square_a_epi16(a_r_m5_m1, a_i_m5_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m1); + square_a_epi16(a_r_m5_m3, a_i_m5_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m3); + square_a_epi16(a_r_m5_m5, a_i_m5_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m5); + square_a_epi16(a_r_m5_m7, a_i_m5_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m5_m7); + square_a_epi16(a_r_m7_p7, a_i_m7_p7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p7); + square_a_epi16(a_r_m7_p5, a_i_m7_p5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p5); + square_a_epi16(a_r_m7_p3, a_i_m7_p3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p3); + square_a_epi16(a_r_m7_p1, a_i_m7_p1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_p1); + square_a_epi16(a_r_m7_m1, a_i_m7_m1, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m1); + square_a_epi16(a_r_m7_m3, a_i_m7_m3, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m3); + square_a_epi16(a_r_m7_m5, a_i_m7_m5, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m5); + square_a_epi16(a_r_m7_m7, a_i_m7_m7, ch_mag_int, SQRT_10_OVER_FOUR, a_sq_m7_m7); + + // Computing different multiples of ||h0||^2 + // x=1, y=1 + ch_mag_2_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,ONE_OVER_FOUR_SQRT_42); + ch_mag_2_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_2_over_42_with_sigma2,1); + // x=1, y=3 + ch_mag_10_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,FIVE_OVER_FOUR_SQRT_42); + ch_mag_10_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_10_over_42_with_sigma2,1); + // x=1, x=5 + ch_mag_26_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,THIRTEEN_OVER_FOUR_SQRT_42); + ch_mag_26_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_26_over_42_with_sigma2,1); + // x=1, y=7 + ch_mag_50_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42); + ch_mag_50_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_50_over_42_with_sigma2,1); + // x=3, y=3 + ch_mag_18_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,NINE_OVER_FOUR_SQRT_42); + ch_mag_18_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_18_over_42_with_sigma2,1); + // x=3, y=5 + ch_mag_34_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,SEVENTEEN_OVER_FOUR_SQRT_42); + ch_mag_34_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_34_over_42_with_sigma2,1); + // x=3, y=7 + ch_mag_58_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYNINE_OVER_FOUR_SQRT_42); + ch_mag_58_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_58_over_42_with_sigma2,2); + // x=5, y=5 + ch_mag_50_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42); + ch_mag_50_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_50_over_42_with_sigma2,1); + // x=5, y=7 + ch_mag_74_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,THIRTYSEVEN_OVER_FOUR_SQRT_42); + ch_mag_74_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_74_over_42_with_sigma2,2); + // x=7, y=7 + ch_mag_98_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,FORTYNINE_OVER_FOUR_SQRT_42); + ch_mag_98_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_98_over_42_with_sigma2,2); + + // Computing Metrics + xmm0 = _mm256_subs_epi16(psi_a_p7_p7, a_sq_p7_p7); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_7); + bit_met_p7_p7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_p5, a_sq_p7_p5); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_5); + bit_met_p7_p5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_p3, a_sq_p7_p3); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_3); + bit_met_p7_p3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_p1, a_sq_p7_p1); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_1); + bit_met_p7_p1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_m1, a_sq_p7_m1); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_1); + bit_met_p7_m1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_m3, a_sq_p7_m3); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_3); + bit_met_p7_m3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_m5, a_sq_p7_m5); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_5); + bit_met_p7_m5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_m7, a_sq_p7_m7); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_7); + bit_met_p7_m7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_p7, a_sq_p5_p7); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_7); + bit_met_p5_p7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_p5, a_sq_p5_p5); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_5); + bit_met_p5_p5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_p3, a_sq_p5_p3); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_3); + bit_met_p5_p3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_p1, a_sq_p5_p1); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_1); + bit_met_p5_p1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_m1, a_sq_p5_m1); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_1); + bit_met_p5_m1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_m3, a_sq_p5_m3); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_3); + bit_met_p5_m3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_m5, a_sq_p5_m5); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_5); + bit_met_p5_m5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_m7, a_sq_p5_m7); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_7); + bit_met_p5_m7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_p7, a_sq_p3_p7); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_7); + bit_met_p3_p7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_p5, a_sq_p3_p5); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_5); + bit_met_p3_p5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_p3, a_sq_p3_p3); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_3); + bit_met_p3_p3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_p1, a_sq_p3_p1); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_1); + bit_met_p3_p1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_m1, a_sq_p3_m1); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_1); + bit_met_p3_m1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_m3, a_sq_p3_m3); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_3); + bit_met_p3_m3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_m5, a_sq_p3_m5); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_5); + bit_met_p3_m5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_m7, a_sq_p3_m7); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_7); + bit_met_p3_m7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_p7, a_sq_p1_p7); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_7); + bit_met_p1_p7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_p5, a_sq_p1_p5); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_5); + bit_met_p1_p5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_p3, a_sq_p1_p3); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_3); + bit_met_p1_p3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_p1, a_sq_p1_p1); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_1); + bit_met_p1_p1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_m1, a_sq_p1_m1); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_1); + bit_met_p1_m1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_m3, a_sq_p1_m3); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_3); + bit_met_p1_m3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_m5, a_sq_p1_m5); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_5); + bit_met_p1_m5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_m7, a_sq_p1_m7); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_7); + bit_met_p1_m7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + + xmm0 = _mm256_subs_epi16(psi_a_m1_p7, a_sq_m1_p7); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_7); + bit_met_m1_p7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_p5, a_sq_m1_p5); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_5); + bit_met_m1_p5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_p3, a_sq_m1_p3); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_3); + bit_met_m1_p3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_p1, a_sq_m1_p1); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_1); + bit_met_m1_p1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_m1, a_sq_m1_m1); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_1); + bit_met_m1_m1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_m3, a_sq_m1_m3); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_3); + bit_met_m1_m3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_m5, a_sq_m1_m5); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_5); + bit_met_m1_m5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_m7, a_sq_m1_m7); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_7); + bit_met_m1_m7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_p7, a_sq_m3_p7); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_7); + bit_met_m3_p7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_p5, a_sq_m3_p5); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_5); + bit_met_m3_p5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_p3, a_sq_m3_p3); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_3); + bit_met_m3_p3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_p1, a_sq_m3_p1); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_1); + bit_met_m3_p1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_m1, a_sq_m3_m1); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_1); + bit_met_m3_m1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_m3, a_sq_m3_m3); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_3); + bit_met_m3_m3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_m5, a_sq_m3_m5); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_5); + bit_met_m3_m5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_m7, a_sq_m3_m7); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_7); + bit_met_m3_m7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_p7, a_sq_m5_p7); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_7); + bit_met_m5_p7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_p5, a_sq_m5_p5); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_5); + bit_met_m5_p5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_p3, a_sq_m5_p3); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_3); + bit_met_m5_p3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_p1, a_sq_m5_p1); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_1); + bit_met_m5_p1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_m1, a_sq_m5_m1); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_1); + bit_met_m5_m1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_m3, a_sq_m5_m3); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_3); + bit_met_m5_m3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_m5, a_sq_m5_m5); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_5); + bit_met_m5_m5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_m7, a_sq_m5_m7); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_7); + bit_met_m5_m7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_p7, a_sq_m7_p7); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_7); + bit_met_m7_p7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_p5, a_sq_m7_p5); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_5); + bit_met_m7_p5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_p3, a_sq_m7_p3); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_3); + bit_met_m7_p3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_p1, a_sq_m7_p1); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_1); + bit_met_m7_p1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_m1, a_sq_m7_m1); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_1); + bit_met_m7_m1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_m3, a_sq_m7_m3); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_3); + bit_met_m7_m3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_m5, a_sq_m7_m5); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_5); + bit_met_m7_m5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_m7, a_sq_m7_m7); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_7); + bit_met_m7_m7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + + // Detection for 1st bit (LTE mapping) + // bit = 1 + xmm0 = _mm256_max_epi16(bit_met_m7_p7, bit_met_m7_p5); + xmm1 = _mm256_max_epi16(bit_met_m7_p3, bit_met_m7_p1); + xmm2 = _mm256_max_epi16(bit_met_m7_m1, bit_met_m7_m3); + xmm3 = _mm256_max_epi16(bit_met_m7_m5, bit_met_m7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m5_p5); + xmm1 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m5_p1); + xmm2 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m5_m3); + xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m5_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m3_p7, bit_met_m3_p5); + xmm1 = _mm256_max_epi16(bit_met_m3_p3, bit_met_m3_p1); + xmm2 = _mm256_max_epi16(bit_met_m3_m1, bit_met_m3_m3); + xmm3 = _mm256_max_epi16(bit_met_m3_m5, bit_met_m3_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m1_p5); + xmm1 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m1_p1); + xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m1_m3); + xmm3 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m1_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + // bit = 0 + xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p7_p5); + xmm1 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p7_p1); + xmm2 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p7_m3); + xmm3 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p5_p7, bit_met_p5_p5); + xmm1 = _mm256_max_epi16(bit_met_p5_p3, bit_met_p5_p1); + xmm2 = _mm256_max_epi16(bit_met_p5_m1, bit_met_p5_m3); + xmm3 = _mm256_max_epi16(bit_met_p5_m5, bit_met_p5_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p3_p5); + xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p3_p1); + xmm2 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p3_m3); + xmm3 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p3_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p1_p7, bit_met_p1_p5); + xmm1 = _mm256_max_epi16(bit_met_p1_p3, bit_met_p1_p1); + xmm2 = _mm256_max_epi16(bit_met_p1_m1, bit_met_p1_m3); + xmm3 = _mm256_max_epi16(bit_met_p1_m5, bit_met_p1_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y0r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 2nd bit (LTE mapping) + // bit = 1 + xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1); + xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1); + xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3); + xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3); + xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5); + xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5); + xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7); + xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7); + xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7); + xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + // bit = 0 + xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1); + xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1); + xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m7_p1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3); + xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3); + xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5); + xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5); + xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m7_p5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7); + xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7); + xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7); + xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y1r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 3rd bit (LTE mapping) + xmm0 = _mm256_max_epi16(bit_met_m7_m7, bit_met_m7_m5); + xmm1 = _mm256_max_epi16(bit_met_m7_m3, bit_met_m7_m1); + xmm2 = _mm256_max_epi16(bit_met_m7_p1, bit_met_m7_p3); + xmm3 = _mm256_max_epi16(bit_met_m7_p5, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m5_m5); + xmm1 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m5_m1); + xmm2 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p5_m7, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p5_m3, bit_met_p5_m1); + xmm2 = _mm256_max_epi16(bit_met_p5_p1, bit_met_p5_p3); + xmm3 = _mm256_max_epi16(bit_met_p5_p5, bit_met_p5_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p7_m5); + xmm1 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p7_m1); + xmm2 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p7_p3); + xmm3 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm256_max_epi16(bit_met_m3_m7, bit_met_m3_m5); + xmm1 = _mm256_max_epi16(bit_met_m3_m3, bit_met_m3_m1); + xmm2 = _mm256_max_epi16(bit_met_m3_p1, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m3_p5, bit_met_m3_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m1_m5); + xmm1 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m1_p3); + xmm3 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m1_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p1_m7, bit_met_p1_m5); + xmm1 = _mm256_max_epi16(bit_met_p1_m3, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_p1_p1, bit_met_p1_p3); + xmm3 = _mm256_max_epi16(bit_met_p1_p5, bit_met_p1_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p3_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p3_m1); + xmm2 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p3_p3); + xmm3 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p3_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y2r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 4th bit (LTE mapping) + xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7); + xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7); + xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7); + xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5); + xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5); + xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5); + xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5); + xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7); + xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7); + xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7); + xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1); + xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1); + xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3); + xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3); + xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1); + xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1); + xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m7_p1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3); + xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3); + xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y0i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + + // Detection for 5th bit (LTE mapping) + xmm0 = _mm256_max_epi16(bit_met_m7_m7, bit_met_m7_m5); + xmm1 = _mm256_max_epi16(bit_met_m7_m3, bit_met_m7_m1); + xmm2 = _mm256_max_epi16(bit_met_m7_p1, bit_met_m7_p3); + xmm3 = _mm256_max_epi16(bit_met_m7_p5, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m1_m5); + xmm1 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m1_p3); + xmm3 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m1_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p1_m7, bit_met_p1_m5); + xmm1 = _mm256_max_epi16(bit_met_p1_m3, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_p1_p1, bit_met_p1_p3); + xmm3 = _mm256_max_epi16(bit_met_p1_p5, bit_met_p1_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p7_m5); + xmm1 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p7_m1); + xmm2 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p7_p3); + xmm3 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m5_m5); + xmm1 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m5_m1); + xmm2 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m3_m7, bit_met_m3_m5); + xmm1 = _mm256_max_epi16(bit_met_m3_m3, bit_met_m3_m1); + xmm2 = _mm256_max_epi16(bit_met_m3_p1, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m3_p5, bit_met_m3_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p3_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p3_m1); + xmm2 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p3_p3); + xmm3 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p3_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p5_m7, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p5_m3, bit_met_p5_m1); + xmm2 = _mm256_max_epi16(bit_met_p5_p1, bit_met_p5_p3); + xmm3 = _mm256_max_epi16(bit_met_p5_p5, bit_met_p5_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y1i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 6th bit (LTE mapping) + xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7); + xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7); + xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7); + xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1); + xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1); + xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1); + xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1); + xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7); + xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7); + xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7); + xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5); + xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5); + xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3); + xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3); + xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3); + xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3); + xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5); + xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5); + xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m7_p5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y2i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // map to output stream, difficult to do in SIMD since we have 6 16bit LLRs + // RE 1 + j = 48*i; + stream0_out[j + 0] = ((short *)&y0r)[0]; + stream0_out[j + 1] = ((short *)&y1r)[0]; + stream0_out[j + 2] = ((short *)&y2r)[0]; + stream0_out[j + 3] = ((short *)&y0i)[0]; + stream0_out[j + 4] = ((short *)&y1i)[0]; + stream0_out[j + 5] = ((short *)&y2i)[0]; + // RE 2 + stream0_out[j + 6] = ((short *)&y0r)[1]; + stream0_out[j + 7] = ((short *)&y1r)[1]; + stream0_out[j + 8] = ((short *)&y2r)[1]; + stream0_out[j + 9] = ((short *)&y0i)[1]; + stream0_out[j + 10] = ((short *)&y1i)[1]; + stream0_out[j + 11] = ((short *)&y2i)[1]; + // RE 3 + stream0_out[j + 12] = ((short *)&y0r)[2]; + stream0_out[j + 13] = ((short *)&y1r)[2]; + stream0_out[j + 14] = ((short *)&y2r)[2]; + stream0_out[j + 15] = ((short *)&y0i)[2]; + stream0_out[j + 16] = ((short *)&y1i)[2]; + stream0_out[j + 17] = ((short *)&y2i)[2]; + // RE 4 + stream0_out[j + 18] = ((short *)&y0r)[3]; + stream0_out[j + 19] = ((short *)&y1r)[3]; + stream0_out[j + 20] = ((short *)&y2r)[3]; + stream0_out[j + 21] = ((short *)&y0i)[3]; + stream0_out[j + 22] = ((short *)&y1i)[3]; + stream0_out[j + 23] = ((short *)&y2i)[3]; + // RE 5 + stream0_out[j + 24] = ((short *)&y0r)[4]; + stream0_out[j + 25] = ((short *)&y1r)[4]; + stream0_out[j + 26] = ((short *)&y2r)[4]; + stream0_out[j + 27] = ((short *)&y0i)[4]; + stream0_out[j + 28] = ((short *)&y1i)[4]; + stream0_out[j + 29] = ((short *)&y2i)[4]; + // RE 6 + stream0_out[j + 30] = ((short *)&y0r)[5]; + stream0_out[j + 31] = ((short *)&y1r)[5]; + stream0_out[j + 32] = ((short *)&y2r)[5]; + stream0_out[j + 33] = ((short *)&y0i)[5]; + stream0_out[j + 34] = ((short *)&y1i)[5]; + stream0_out[j + 35] = ((short *)&y2i)[5]; + // RE 7 + stream0_out[j + 36] = ((short *)&y0r)[6]; + stream0_out[j + 37] = ((short *)&y1r)[6]; + stream0_out[j + 38] = ((short *)&y2r)[6]; + stream0_out[j + 39] = ((short *)&y0i)[6]; + stream0_out[j + 40] = ((short *)&y1i)[6]; + stream0_out[j + 41] = ((short *)&y2i)[6]; + // RE 8 + stream0_out[j + 42] = ((short *)&y0r)[7]; + stream0_out[j + 43] = ((short *)&y1r)[7]; + stream0_out[j + 44] = ((short *)&y2r)[7]; + stream0_out[j + 45] = ((short *)&y0i)[7]; + stream0_out[j + 46] = ((short *)&y1i)[7]; + stream0_out[j + 47] = ((short *)&y2i)[7]; + + // RE 9 + stream0_out[j + 48] = ((short *)&y0r)[8]; + stream0_out[j + 49] = ((short *)&y1r)[8]; + stream0_out[j + 50] = ((short *)&y2r)[8]; + stream0_out[j + 51] = ((short *)&y0i)[8]; + stream0_out[j + 52] = ((short *)&y1i)[8]; + stream0_out[j + 53] = ((short *)&y2i)[8]; + // RE 10 + stream0_out[j + 54] = ((short *)&y0r)[9]; + stream0_out[j + 55] = ((short *)&y1r)[9]; + stream0_out[j + 56] = ((short *)&y2r)[9]; + stream0_out[j + 57] = ((short *)&y0i)[9]; + stream0_out[j + 58] = ((short *)&y1i)[9]; + stream0_out[j + 59] = ((short *)&y2i)[9]; + // RE 11 + stream0_out[j + 60] = ((short *)&y0r)[10]; + stream0_out[j + 61] = ((short *)&y1r)[10]; + stream0_out[j + 62] = ((short *)&y2r)[10]; + stream0_out[j + 63] = ((short *)&y0i)[10]; + stream0_out[j + 64] = ((short *)&y1i)[10]; + stream0_out[j + 65] = ((short *)&y2i)[10]; + // RE 12 + stream0_out[j + 66] = ((short *)&y0r)[11]; + stream0_out[j + 67] = ((short *)&y1r)[11]; + stream0_out[j + 68] = ((short *)&y2r)[11]; + stream0_out[j + 69] = ((short *)&y0i)[11]; + stream0_out[j + 70] = ((short *)&y1i)[11]; + stream0_out[j + 71] = ((short *)&y2i)[11]; + // RE 13 + stream0_out[j + 72] = ((short *)&y0r)[12]; + stream0_out[j + 73] = ((short *)&y1r)[12]; + stream0_out[j + 74] = ((short *)&y2r)[12]; + stream0_out[j + 75] = ((short *)&y0i)[12]; + stream0_out[j + 76] = ((short *)&y1i)[12]; + stream0_out[j + 77] = ((short *)&y2i)[12]; + // RE 14 + stream0_out[j + 78] = ((short *)&y0r)[13]; + stream0_out[j + 79] = ((short *)&y1r)[13]; + stream0_out[j + 80] = ((short *)&y2r)[13]; + stream0_out[j + 81] = ((short *)&y0i)[13]; + stream0_out[j + 82] = ((short *)&y1i)[13]; + stream0_out[j + 83] = ((short *)&y2i)[13]; + // RE 15 + stream0_out[j + 84] = ((short *)&y0r)[14]; + stream0_out[j + 85] = ((short *)&y1r)[14]; + stream0_out[j + 86] = ((short *)&y2r)[14]; + stream0_out[j + 87] = ((short *)&y0i)[14]; + stream0_out[j + 88] = ((short *)&y1i)[14]; + stream0_out[j + 89] = ((short *)&y2i)[14]; + // RE 16 + stream0_out[j + 90] = ((short *)&y0r)[15]; + stream0_out[j + 91] = ((short *)&y1r)[15]; + stream0_out[j + 92] = ((short *)&y2r)[15]; + stream0_out[j + 93] = ((short *)&y0i)[15]; + stream0_out[j + 94] = ((short *)&y1i)[15]; + stream0_out[j + 95] = ((short *)&y2i)[15]; + +#elif defined(__arm__) + +#endif + } + +#if defined(__x86_64__) || defined(__i386__) + _mm_empty(); + _m_empty(); +#endif + +} + +void qam64_qam64_avx2(int32_t *stream0_in, + int32_t *stream1_in, + int32_t *ch_mag, + int32_t *ch_mag_i, + int16_t *stream0_out, + int32_t *rho01, + int length + ) +{ + + /* + Author: S. Wagner + Date: 28-02-17 + + Input: + stream0_in: MF filter for 1st stream, i.e., y0=h0'*y + stream1_in: MF filter for 2nd stream, i.e., y1=h1'*y + ch_mag: 4*h0/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc + ch_mag_i: 4*h1/sqrt(42), [Re0 Im0 Re1 Im1] s.t. Im0=Re0, Im1=Re1, etc + rho01: Channel cross correlation, i.e., h1'*h0 + + Output: + stream0_out: output LLRs for 1st stream + */ + +#if defined(__x86_64__) || defined(__i386__) + + __m256i *rho01_256i = (__m256i *)rho01; + __m256i *stream0_256i_in = (__m256i *)stream0_in; + __m256i *stream1_256i_in = (__m256i *)stream1_in; + __m256i *ch_mag_256i = (__m256i *)ch_mag; + __m256i *ch_mag_256i_i = (__m256i *)ch_mag_i; + + __m256i ONE_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(10112)); // round(1/sqrt(42)*2^16) + __m256i THREE_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(30337)); // round(3/sqrt(42)*2^16) + __m256i FIVE_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(25281)); // round(5/sqrt(42)*2^15) + __m256i SEVEN_OVER_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(17697)); // round(7/sqrt(42)*2^14) Q2.14 + __m256i ONE_OVER_SQRT_2 = _mm256_broadcastw_epi16(_mm_set1_epi16(23170)); // round(1/sqrt(2)*2^15) + __m256i ONE_OVER_SQRT_2_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(3575)); // round(1/sqrt(2*42)*2^15) + __m256i THREE_OVER_SQRT_2_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(10726)); // round(3/sqrt(2*42)*2^15) + __m256i FIVE_OVER_SQRT_2_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(17876)); // round(5/sqrt(2*42)*2^15) + __m256i SEVEN_OVER_SQRT_2_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(25027)); // round(7/sqrt(2*42)*2^15) + __m256i FORTYNINE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(30969)); // round(49/(4*sqrt(42))*2^14), Q2.14 + __m256i THIRTYSEVEN_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(23385)); // round(37/(4*sqrt(42))*2^14), Q2.14 + __m256i TWENTYFIVE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(31601)); // round(25/(4*sqrt(42))*2^15) + __m256i TWENTYNINE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(18329)); // round(29/(4*sqrt(42))*2^15), Q2.14 + __m256i SEVENTEEN_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(21489)); // round(17/(4*sqrt(42))*2^15) + __m256i NINE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(11376)); // round(9/(4*sqrt(42))*2^15) + __m256i THIRTEEN_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(16433)); // round(13/(4*sqrt(42))*2^15) + __m256i FIVE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(6320)); // round(5/(4*sqrt(42))*2^15) + __m256i ONE_OVER_FOUR_SQRT_42 = _mm256_broadcastw_epi16(_mm_set1_epi16(1264)); // round(1/(4*sqrt(42))*2^15) + __m256i SQRT_42_OVER_FOUR = _mm256_broadcastw_epi16(_mm_set1_epi16(13272)); // round(sqrt(42)/4*2^13), Q3.12 + + __m256i ch_mag_des; + __m256i ch_mag_int; + __m256i ch_mag_98_over_42_with_sigma2; + __m256i ch_mag_74_over_42_with_sigma2; + __m256i ch_mag_58_over_42_with_sigma2; + __m256i ch_mag_50_over_42_with_sigma2; + __m256i ch_mag_34_over_42_with_sigma2; + __m256i ch_mag_18_over_42_with_sigma2; + __m256i ch_mag_26_over_42_with_sigma2; + __m256i ch_mag_10_over_42_with_sigma2; + __m256i ch_mag_2_over_42_with_sigma2; + __m256i y0r_one_over_sqrt_21; + __m256i y0r_three_over_sqrt_21; + __m256i y0r_five_over_sqrt_21; + __m256i y0r_seven_over_sqrt_21; + __m256i y0i_one_over_sqrt_21; + __m256i y0i_three_over_sqrt_21; + __m256i y0i_five_over_sqrt_21; + __m256i y0i_seven_over_sqrt_21; + __m256i ch_mag_int_with_sigma2; + __m256i two_ch_mag_int_with_sigma2; + __m256i three_ch_mag_int_with_sigma2; +#elif defined(__arm__) + +#endif + + int i,j; + uint32_t len256 = (length)>>3; + + for (i=0; i<len256; i+=2) { + +#if defined(__x86_64__) || defined(__i386__) + + // Get rho + /* + xmm0 = rho01_256i[i]; + xmm1 = rho01_256i[i+1]; + xmm0 = _mm256_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm256_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm256_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + + xmm1 = _mm256_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + + //xmm0 = [Re(0,1,2,3) Im(0,1,2,3) Re(4,5,6,7) Im(4,5,6,7)] + //xmm0 = [Re(8,9,10,11) Im(8,9,10,11) Re(12,13,14,15) Im(12,13,14,15)] + + xmm2 = _mm256_unpacklo_epi64(xmm0, xmm1); + //xmm2 = [Re(0,1,2,3) Re(8,9,10,11) Re(4,5,6,7) Re(12,13,14,15)] + xmm2 = _mm256_permute4x64_epi64(xmm2,0xd8); // Re(rho) + + xmm3 = _mm256_unpackhi_epi64(xmm0, xmm1); + //xmm3 = [Im(0,1,2,3) Im(8,9,10,11) Im(4,5,6,7) Im(12,13,14,15)] + xmm3 = _mm256_permute4x64_epi64(xmm3,0xd8); // Im(rho) + */ + + seperate_real_imag_parts(&xmm2, &xmm3, rho01_256i[i], rho01_256i[i+1]); + + rho_rpi = _mm256_adds_epi16(xmm2,xmm3); // rho = Re(rho) + Im(rho) + rho_rmi = _mm256_subs_epi16(xmm2,xmm3); // rho* = Re(rho) - Im(rho) + + // Compute the different rhos + rho_rpi_1_1 = _mm256_mulhi_epi16(rho_rpi, ONE_OVER_SQRT_42); + rho_rmi_1_1 = _mm256_mulhi_epi16(rho_rmi, ONE_OVER_SQRT_42); + rho_rpi_3_3 = _mm256_mulhi_epi16(rho_rpi, THREE_OVER_SQRT_42); + rho_rmi_3_3 = _mm256_mulhi_epi16(rho_rmi, THREE_OVER_SQRT_42); + rho_rpi_5_5 = _mm256_mulhi_epi16(rho_rpi, FIVE_OVER_SQRT_42); + rho_rmi_5_5 = _mm256_mulhi_epi16(rho_rmi, FIVE_OVER_SQRT_42); + rho_rpi_7_7 = _mm256_mulhi_epi16(rho_rpi, SEVEN_OVER_SQRT_42); + rho_rmi_7_7 = _mm256_mulhi_epi16(rho_rmi, SEVEN_OVER_SQRT_42); + + rho_rpi_5_5 = _mm256_slli_epi16(rho_rpi_5_5, 1); + rho_rmi_5_5 = _mm256_slli_epi16(rho_rmi_5_5, 1); + rho_rpi_7_7 = _mm256_slli_epi16(rho_rpi_7_7, 2); + rho_rmi_7_7 = _mm256_slli_epi16(rho_rmi_7_7, 2); + + xmm4 = _mm256_mulhi_epi16(xmm2, ONE_OVER_SQRT_42); + xmm5 = _mm256_mulhi_epi16(xmm3, ONE_OVER_SQRT_42); + xmm6 = _mm256_mulhi_epi16(xmm3, THREE_OVER_SQRT_42); + xmm7 = _mm256_mulhi_epi16(xmm3, FIVE_OVER_SQRT_42); + xmm8 = _mm256_mulhi_epi16(xmm3, SEVEN_OVER_SQRT_42); + xmm7 = _mm256_slli_epi16(xmm7, 1); + xmm8 = _mm256_slli_epi16(xmm8, 2); + + rho_rpi_1_3 = _mm256_adds_epi16(xmm4, xmm6); + rho_rmi_1_3 = _mm256_subs_epi16(xmm4, xmm6); + rho_rpi_1_5 = _mm256_adds_epi16(xmm4, xmm7); + rho_rmi_1_5 = _mm256_subs_epi16(xmm4, xmm7); + rho_rpi_1_7 = _mm256_adds_epi16(xmm4, xmm8); + rho_rmi_1_7 = _mm256_subs_epi16(xmm4, xmm8); + + xmm4 = _mm256_mulhi_epi16(xmm2, THREE_OVER_SQRT_42); + rho_rpi_3_1 = _mm256_adds_epi16(xmm4, xmm5); + rho_rmi_3_1 = _mm256_subs_epi16(xmm4, xmm5); + rho_rpi_3_5 = _mm256_adds_epi16(xmm4, xmm7); + rho_rmi_3_5 = _mm256_subs_epi16(xmm4, xmm7); + rho_rpi_3_7 = _mm256_adds_epi16(xmm4, xmm8); + rho_rmi_3_7 = _mm256_subs_epi16(xmm4, xmm8); + + xmm4 = _mm256_mulhi_epi16(xmm2, FIVE_OVER_SQRT_42); + xmm4 = _mm256_slli_epi16(xmm4, 1); + rho_rpi_5_1 = _mm256_adds_epi16(xmm4, xmm5); + rho_rmi_5_1 = _mm256_subs_epi16(xmm4, xmm5); + rho_rpi_5_3 = _mm256_adds_epi16(xmm4, xmm6); + rho_rmi_5_3 = _mm256_subs_epi16(xmm4, xmm6); + rho_rpi_5_7 = _mm256_adds_epi16(xmm4, xmm8); + rho_rmi_5_7 = _mm256_subs_epi16(xmm4, xmm8); + + xmm4 = _mm256_mulhi_epi16(xmm2, SEVEN_OVER_SQRT_42); + xmm4 = _mm256_slli_epi16(xmm4, 2); + rho_rpi_7_1 = _mm256_adds_epi16(xmm4, xmm5); + rho_rmi_7_1 = _mm256_subs_epi16(xmm4, xmm5); + rho_rpi_7_3 = _mm256_adds_epi16(xmm4, xmm6); + rho_rmi_7_3 = _mm256_subs_epi16(xmm4, xmm6); + rho_rpi_7_5 = _mm256_adds_epi16(xmm4, xmm7); + rho_rmi_7_5 = _mm256_subs_epi16(xmm4, xmm7); + + // Rearrange interfering MF output + /* + xmm0 = stream1_256i_in[i]; + xmm1 = stream1_256i_in[i+1]; + xmm0 = _mm256_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm256_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm256_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + + xmm1 = _mm256_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm256_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + + y1r = _mm256_unpacklo_epi64(xmm0, xmm1); + y1r = _mm256_permute4x64_epi64(y1r,0xd8); // Re(y1) + + y1i = _mm256_unpackhi_epi64(xmm0, xmm1); + y1i = _mm256_permute4x64_epi64(y1i,0xd8); // Im(y1) + */ + + seperate_real_imag_parts(&y1r, &y1i, stream1_256i_in[i], stream1_256i_in[i+1]); + + // Psi_r calculation from rho_rpi or rho_rmi + xmm0 = _mm256_broadcastw_epi16(_mm_set1_epi16(0));// ZERO for abs_pi16 + xmm2 = _mm256_subs_epi16(rho_rpi_7_7, y1r); + + psi_r_p7_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_5, y1r); + psi_r_p7_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_3, y1r); + psi_r_p7_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_1, y1r); + psi_r_p7_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_1, y1r); + psi_r_p7_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_3, y1r); + psi_r_p7_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_5, y1r); + psi_r_p7_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_7, y1r); + psi_r_p7_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_7, y1r); + psi_r_p5_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_5, y1r); + psi_r_p5_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_3, y1r); + psi_r_p5_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_1, y1r); + psi_r_p5_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_1, y1r); + psi_r_p5_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_3, y1r); + psi_r_p5_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_5, y1r); + psi_r_p5_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_7, y1r); + psi_r_p5_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_7, y1r); + psi_r_p3_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_5, y1r); + psi_r_p3_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_3, y1r); + psi_r_p3_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_1, y1r); + psi_r_p3_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_1, y1r); + psi_r_p3_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_3, y1r); + psi_r_p3_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_5, y1r); + psi_r_p3_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_7, y1r); + psi_r_p3_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_7, y1r); + psi_r_p1_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_5, y1r); + psi_r_p1_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_3, y1r); + psi_r_p1_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_1, y1r); + psi_r_p1_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_1, y1r); + psi_r_p1_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_3, y1r); + psi_r_p1_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_5, y1r); + psi_r_p1_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_7, y1r); + psi_r_p1_m7 = _mm256_abs_epi16(xmm2); + + xmm2 = _mm256_adds_epi16(rho_rmi_1_7, y1r); + psi_r_m1_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_5, y1r); + psi_r_m1_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_3, y1r); + psi_r_m1_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_1, y1r); + psi_r_m1_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_1, y1r); + psi_r_m1_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_3, y1r); + psi_r_m1_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_5, y1r); + psi_r_m1_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_7, y1r); + psi_r_m1_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_7, y1r); + psi_r_m3_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_5, y1r); + psi_r_m3_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_3, y1r); + psi_r_m3_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_1, y1r); + psi_r_m3_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_1, y1r); + psi_r_m3_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_3, y1r); + psi_r_m3_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_5, y1r); + psi_r_m3_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_7, y1r); + psi_r_m3_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_7, y1r); + psi_r_m5_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_5, y1r); + psi_r_m5_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_3, y1r); + psi_r_m5_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_1, y1r); + psi_r_m5_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_1, y1r); + psi_r_m5_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_3, y1r); + psi_r_m5_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_5, y1r); + psi_r_m5_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_7, y1r); + psi_r_m5_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_7, y1r); + psi_r_m7_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_5, y1r); + psi_r_m7_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_3, y1r); + psi_r_m7_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_1, y1r); + psi_r_m7_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_1, y1r); + psi_r_m7_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_3, y1r); + psi_r_m7_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_5, y1r); + psi_r_m7_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_7, y1r); + psi_r_m7_m7 = _mm256_abs_epi16(xmm2); + + // Psi_i calculation from rho_rpi or rho_rmi + xmm2 = _mm256_subs_epi16(rho_rmi_7_7, y1i); + psi_i_p7_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_7, y1i); + psi_i_p7_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_7, y1i); + psi_i_p7_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_7, y1i); + psi_i_p7_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_7, y1i); + psi_i_p7_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_7, y1i); + psi_i_p7_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_7, y1i); + psi_i_p7_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_7, y1i); + psi_i_p7_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_5, y1i); + psi_i_p5_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_5, y1i); + psi_i_p5_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_5, y1i); + psi_i_p5_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_5, y1i); + psi_i_p5_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_5, y1i); + psi_i_p5_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_5, y1i); + psi_i_p5_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_5, y1i); + psi_i_p5_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_5, y1i); + psi_i_p5_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_3, y1i); + psi_i_p3_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_3, y1i); + psi_i_p3_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_3, y1i); + psi_i_p3_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_3, y1i); + psi_i_p3_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_3, y1i); + psi_i_p3_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_3, y1i); + psi_i_p3_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_3, y1i); + psi_i_p3_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_3, y1i); + psi_i_p3_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_7_1, y1i); + psi_i_p1_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_5_1, y1i); + psi_i_p1_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_3_1, y1i); + psi_i_p1_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rmi_1_1, y1i); + psi_i_p1_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_1_1, y1i); + psi_i_p1_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_3_1, y1i); + psi_i_p1_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_5_1, y1i); + psi_i_p1_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rpi_7_1, y1i); + psi_i_p1_m7 = _mm256_abs_epi16(xmm2); + + xmm2 = _mm256_subs_epi16(rho_rpi_7_1, y1i); + psi_i_m1_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_1, y1i); + psi_i_m1_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_1, y1i); + psi_i_m1_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_1, y1i); + psi_i_m1_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_1, y1i); + psi_i_m1_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_1, y1i); + psi_i_m1_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_1, y1i); + psi_i_m1_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_1, y1i); + psi_i_m1_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_3, y1i); + psi_i_m3_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_3, y1i); + psi_i_m3_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_3, y1i); + psi_i_m3_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_3, y1i); + psi_i_m3_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_3, y1i); + psi_i_m3_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_3, y1i); + psi_i_m3_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_3, y1i); + psi_i_m3_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_3, y1i); + psi_i_m3_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_5, y1i); + psi_i_m5_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_5, y1i); + psi_i_m5_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_5, y1i); + psi_i_m5_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_5, y1i); + psi_i_m5_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_5, y1i); + psi_i_m5_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_5, y1i); + psi_i_m5_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_5, y1i); + psi_i_m5_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_5, y1i); + psi_i_m5_m7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_7_7, y1i); + psi_i_m7_p7 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_5_7, y1i); + psi_i_m7_p5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_3_7, y1i); + psi_i_m7_p3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_subs_epi16(rho_rpi_1_7, y1i); + psi_i_m7_p1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_1_7, y1i); + psi_i_m7_m1 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_3_7, y1i); + psi_i_m7_m3 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_5_7, y1i); + psi_i_m7_m5 = _mm256_abs_epi16(xmm2); + xmm2 = _mm256_adds_epi16(rho_rmi_7_7, y1i); + psi_i_m7_m7 = _mm256_abs_epi16(xmm2); + + /* + // Rearrange desired MF output + xmm0 = stream0_256i_in[i]; + xmm1 = stream0_256i_in[i+1]; + xmm0 = _mm_shufflelo_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shufflehi_epi16(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm0 = _mm_shuffle_epi32(xmm0,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflelo_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shufflehi_epi16(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm1 = _mm_shuffle_epi32(xmm1,0xd8); //_MM_SHUFFLE(0,2,1,3)); + //xmm0 = [Re(0,1) Re(2,3) Im(0,1) Im(2,3)] + //xmm1 = [Re(4,5) Re(6,7) Im(4,5) Im(6,7)] + y0r = _mm_unpacklo_epi64(xmm0,xmm1); // = [y0r(1),y0r(2),y0r(3),y0r(4)] + y0i = _mm_unpackhi_epi64(xmm0,xmm1); + */ + seperate_real_imag_parts(&y0r, &y0i, stream0_256i_in[i], stream0_256i_in[i+1]); + + // Rearrange desired channel magnitudes + // [|h|^2(1),|h|^2(1),|h|^2(2),|h|^2(2),...,,|h|^2(7),|h|^2(7)]*(2/sqrt(10)) + /* + xmm2 = ch_mag_256i[i]; + xmm3 = ch_mag_256i[i+1]; + xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + ch_mag_des = _mm_unpacklo_epi64(xmm2,xmm3); + */ + // xmm2 is dummy variable that contains the same values as ch_mag_des + seperate_real_imag_parts(&ch_mag_des, &xmm2, ch_mag_256i[i], ch_mag_256i[i+1]); + + + // Rearrange interfering channel magnitudes + /* + xmm2 = ch_mag_256i_i[i]; + xmm3 = ch_mag_256i_i[i+1]; + xmm2 = _mm_shufflelo_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shufflehi_epi16(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm2 = _mm_shuffle_epi32(xmm2,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflelo_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shufflehi_epi16(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + xmm3 = _mm_shuffle_epi32(xmm3,0xd8); //_MM_SHUFFLE(0,2,1,3)); + ch_mag_int = _mm_unpacklo_epi64(xmm2,xmm3); + */ + seperate_real_imag_parts(&ch_mag_int, &xmm2, ch_mag_256i_i[i], ch_mag_256i_i[i+1]); + + y0r_one_over_sqrt_21 = _mm256_mulhi_epi16(y0r, ONE_OVER_SQRT_42); + y0r_three_over_sqrt_21 = _mm256_mulhi_epi16(y0r, THREE_OVER_SQRT_42); + y0r_five_over_sqrt_21 = _mm256_mulhi_epi16(y0r, FIVE_OVER_SQRT_42); + y0r_five_over_sqrt_21 = _mm256_slli_epi16(y0r_five_over_sqrt_21, 1); + y0r_seven_over_sqrt_21 = _mm256_mulhi_epi16(y0r, SEVEN_OVER_SQRT_42); + y0r_seven_over_sqrt_21 = _mm256_slli_epi16(y0r_seven_over_sqrt_21, 2); // Q2.14 + + y0i_one_over_sqrt_21 = _mm256_mulhi_epi16(y0i, ONE_OVER_SQRT_42); + y0i_three_over_sqrt_21 = _mm256_mulhi_epi16(y0i, THREE_OVER_SQRT_42); + y0i_five_over_sqrt_21 = _mm256_mulhi_epi16(y0i, FIVE_OVER_SQRT_42); + y0i_five_over_sqrt_21 = _mm256_slli_epi16(y0i_five_over_sqrt_21, 1); + y0i_seven_over_sqrt_21 = _mm256_mulhi_epi16(y0i, SEVEN_OVER_SQRT_42); + y0i_seven_over_sqrt_21 = _mm256_slli_epi16(y0i_seven_over_sqrt_21, 2); // Q2.14 + + + y0_p_7_1 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_7_3 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_7_5 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_7_7 = _mm256_adds_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_p_5_1 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_5_3 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_5_5 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_5_7 = _mm256_adds_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_p_3_1 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_3_3 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_3_5 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_3_7 = _mm256_adds_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_p_1_1 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21); + y0_p_1_3 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21); + y0_p_1_5 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21); + y0_p_1_7 = _mm256_adds_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21); + + y0_m_1_1 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_1_3 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_1_5 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_1_7 = _mm256_subs_epi16(y0r_one_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_m_3_1 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_3_3 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_3_5 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_3_7 = _mm256_subs_epi16(y0r_three_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_m_5_1 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_5_3 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_5_5 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_5_7 = _mm256_subs_epi16(y0r_five_over_sqrt_21, y0i_seven_over_sqrt_21); + y0_m_7_1 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_one_over_sqrt_21); + y0_m_7_3 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_three_over_sqrt_21); + y0_m_7_5 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_five_over_sqrt_21); + y0_m_7_7 = _mm256_subs_epi16(y0r_seven_over_sqrt_21, y0i_seven_over_sqrt_21); + + // Detection of interference term + ch_mag_int_with_sigma2 = _mm256_srai_epi16(ch_mag_int, 1); // *2 + two_ch_mag_int_with_sigma2 = ch_mag_int; // *4 + three_ch_mag_int_with_sigma2 = _mm256_adds_epi16(ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2); // *6 + + interference_abs_64qam_epi16(psi_r_p7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_p1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_p1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_r_m7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_r_m7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + + interference_abs_64qam_epi16(psi_i_p7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_p1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_p1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m1_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m1_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m3_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m3_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m5_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m5_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_p7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_p5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_p3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_p1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_p1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_m1, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m1, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_m3, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m3, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_m5, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m5, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + interference_abs_64qam_epi16(psi_i_m7_m7, ch_mag_int_with_sigma2, two_ch_mag_int_with_sigma2, three_ch_mag_int_with_sigma2, a_i_m7_m7, ONE_OVER_SQRT_2_42, THREE_OVER_SQRT_2_42, FIVE_OVER_SQRT_2_42, + SEVEN_OVER_SQRT_2_42); + + // Calculation of a group of two terms in the bit metric involving product of psi and interference + prodsum_psi_a_epi16(psi_r_p7_p7, a_r_p7_p7, psi_i_p7_p7, a_i_p7_p7, psi_a_p7_p7); + prodsum_psi_a_epi16(psi_r_p7_p5, a_r_p7_p5, psi_i_p7_p5, a_i_p7_p5, psi_a_p7_p5); + prodsum_psi_a_epi16(psi_r_p7_p3, a_r_p7_p3, psi_i_p7_p3, a_i_p7_p3, psi_a_p7_p3); + prodsum_psi_a_epi16(psi_r_p7_p1, a_r_p7_p1, psi_i_p7_p1, a_i_p7_p1, psi_a_p7_p1); + prodsum_psi_a_epi16(psi_r_p7_m1, a_r_p7_m1, psi_i_p7_m1, a_i_p7_m1, psi_a_p7_m1); + prodsum_psi_a_epi16(psi_r_p7_m3, a_r_p7_m3, psi_i_p7_m3, a_i_p7_m3, psi_a_p7_m3); + prodsum_psi_a_epi16(psi_r_p7_m5, a_r_p7_m5, psi_i_p7_m5, a_i_p7_m5, psi_a_p7_m5); + prodsum_psi_a_epi16(psi_r_p7_m7, a_r_p7_m7, psi_i_p7_m7, a_i_p7_m7, psi_a_p7_m7); + prodsum_psi_a_epi16(psi_r_p5_p7, a_r_p5_p7, psi_i_p5_p7, a_i_p5_p7, psi_a_p5_p7); + prodsum_psi_a_epi16(psi_r_p5_p5, a_r_p5_p5, psi_i_p5_p5, a_i_p5_p5, psi_a_p5_p5); + prodsum_psi_a_epi16(psi_r_p5_p3, a_r_p5_p3, psi_i_p5_p3, a_i_p5_p3, psi_a_p5_p3); + prodsum_psi_a_epi16(psi_r_p5_p1, a_r_p5_p1, psi_i_p5_p1, a_i_p5_p1, psi_a_p5_p1); + prodsum_psi_a_epi16(psi_r_p5_m1, a_r_p5_m1, psi_i_p5_m1, a_i_p5_m1, psi_a_p5_m1); + prodsum_psi_a_epi16(psi_r_p5_m3, a_r_p5_m3, psi_i_p5_m3, a_i_p5_m3, psi_a_p5_m3); + prodsum_psi_a_epi16(psi_r_p5_m5, a_r_p5_m5, psi_i_p5_m5, a_i_p5_m5, psi_a_p5_m5); + prodsum_psi_a_epi16(psi_r_p5_m7, a_r_p5_m7, psi_i_p5_m7, a_i_p5_m7, psi_a_p5_m7); + prodsum_psi_a_epi16(psi_r_p3_p7, a_r_p3_p7, psi_i_p3_p7, a_i_p3_p7, psi_a_p3_p7); + prodsum_psi_a_epi16(psi_r_p3_p5, a_r_p3_p5, psi_i_p3_p5, a_i_p3_p5, psi_a_p3_p5); + prodsum_psi_a_epi16(psi_r_p3_p3, a_r_p3_p3, psi_i_p3_p3, a_i_p3_p3, psi_a_p3_p3); + prodsum_psi_a_epi16(psi_r_p3_p1, a_r_p3_p1, psi_i_p3_p1, a_i_p3_p1, psi_a_p3_p1); + prodsum_psi_a_epi16(psi_r_p3_m1, a_r_p3_m1, psi_i_p3_m1, a_i_p3_m1, psi_a_p3_m1); + prodsum_psi_a_epi16(psi_r_p3_m3, a_r_p3_m3, psi_i_p3_m3, a_i_p3_m3, psi_a_p3_m3); + prodsum_psi_a_epi16(psi_r_p3_m5, a_r_p3_m5, psi_i_p3_m5, a_i_p3_m5, psi_a_p3_m5); + prodsum_psi_a_epi16(psi_r_p3_m7, a_r_p3_m7, psi_i_p3_m7, a_i_p3_m7, psi_a_p3_m7); + prodsum_psi_a_epi16(psi_r_p1_p7, a_r_p1_p7, psi_i_p1_p7, a_i_p1_p7, psi_a_p1_p7); + prodsum_psi_a_epi16(psi_r_p1_p5, a_r_p1_p5, psi_i_p1_p5, a_i_p1_p5, psi_a_p1_p5); + prodsum_psi_a_epi16(psi_r_p1_p3, a_r_p1_p3, psi_i_p1_p3, a_i_p1_p3, psi_a_p1_p3); + prodsum_psi_a_epi16(psi_r_p1_p1, a_r_p1_p1, psi_i_p1_p1, a_i_p1_p1, psi_a_p1_p1); + prodsum_psi_a_epi16(psi_r_p1_m1, a_r_p1_m1, psi_i_p1_m1, a_i_p1_m1, psi_a_p1_m1); + prodsum_psi_a_epi16(psi_r_p1_m3, a_r_p1_m3, psi_i_p1_m3, a_i_p1_m3, psi_a_p1_m3); + prodsum_psi_a_epi16(psi_r_p1_m5, a_r_p1_m5, psi_i_p1_m5, a_i_p1_m5, psi_a_p1_m5); + prodsum_psi_a_epi16(psi_r_p1_m7, a_r_p1_m7, psi_i_p1_m7, a_i_p1_m7, psi_a_p1_m7); + prodsum_psi_a_epi16(psi_r_m1_p7, a_r_m1_p7, psi_i_m1_p7, a_i_m1_p7, psi_a_m1_p7); + prodsum_psi_a_epi16(psi_r_m1_p5, a_r_m1_p5, psi_i_m1_p5, a_i_m1_p5, psi_a_m1_p5); + prodsum_psi_a_epi16(psi_r_m1_p3, a_r_m1_p3, psi_i_m1_p3, a_i_m1_p3, psi_a_m1_p3); + prodsum_psi_a_epi16(psi_r_m1_p1, a_r_m1_p1, psi_i_m1_p1, a_i_m1_p1, psi_a_m1_p1); + prodsum_psi_a_epi16(psi_r_m1_m1, a_r_m1_m1, psi_i_m1_m1, a_i_m1_m1, psi_a_m1_m1); + prodsum_psi_a_epi16(psi_r_m1_m3, a_r_m1_m3, psi_i_m1_m3, a_i_m1_m3, psi_a_m1_m3); + prodsum_psi_a_epi16(psi_r_m1_m5, a_r_m1_m5, psi_i_m1_m5, a_i_m1_m5, psi_a_m1_m5); + prodsum_psi_a_epi16(psi_r_m1_m7, a_r_m1_m7, psi_i_m1_m7, a_i_m1_m7, psi_a_m1_m7); + prodsum_psi_a_epi16(psi_r_m3_p7, a_r_m3_p7, psi_i_m3_p7, a_i_m3_p7, psi_a_m3_p7); + prodsum_psi_a_epi16(psi_r_m3_p5, a_r_m3_p5, psi_i_m3_p5, a_i_m3_p5, psi_a_m3_p5); + prodsum_psi_a_epi16(psi_r_m3_p3, a_r_m3_p3, psi_i_m3_p3, a_i_m3_p3, psi_a_m3_p3); + prodsum_psi_a_epi16(psi_r_m3_p1, a_r_m3_p1, psi_i_m3_p1, a_i_m3_p1, psi_a_m3_p1); + prodsum_psi_a_epi16(psi_r_m3_m1, a_r_m3_m1, psi_i_m3_m1, a_i_m3_m1, psi_a_m3_m1); + prodsum_psi_a_epi16(psi_r_m3_m3, a_r_m3_m3, psi_i_m3_m3, a_i_m3_m3, psi_a_m3_m3); + prodsum_psi_a_epi16(psi_r_m3_m5, a_r_m3_m5, psi_i_m3_m5, a_i_m3_m5, psi_a_m3_m5); + prodsum_psi_a_epi16(psi_r_m3_m7, a_r_m3_m7, psi_i_m3_m7, a_i_m3_m7, psi_a_m3_m7); + prodsum_psi_a_epi16(psi_r_m5_p7, a_r_m5_p7, psi_i_m5_p7, a_i_m5_p7, psi_a_m5_p7); + prodsum_psi_a_epi16(psi_r_m5_p5, a_r_m5_p5, psi_i_m5_p5, a_i_m5_p5, psi_a_m5_p5); + prodsum_psi_a_epi16(psi_r_m5_p3, a_r_m5_p3, psi_i_m5_p3, a_i_m5_p3, psi_a_m5_p3); + prodsum_psi_a_epi16(psi_r_m5_p1, a_r_m5_p1, psi_i_m5_p1, a_i_m5_p1, psi_a_m5_p1); + prodsum_psi_a_epi16(psi_r_m5_m1, a_r_m5_m1, psi_i_m5_m1, a_i_m5_m1, psi_a_m5_m1); + prodsum_psi_a_epi16(psi_r_m5_m3, a_r_m5_m3, psi_i_m5_m3, a_i_m5_m3, psi_a_m5_m3); + prodsum_psi_a_epi16(psi_r_m5_m5, a_r_m5_m5, psi_i_m5_m5, a_i_m5_m5, psi_a_m5_m5); + prodsum_psi_a_epi16(psi_r_m5_m7, a_r_m5_m7, psi_i_m5_m7, a_i_m5_m7, psi_a_m5_m7); + prodsum_psi_a_epi16(psi_r_m7_p7, a_r_m7_p7, psi_i_m7_p7, a_i_m7_p7, psi_a_m7_p7); + prodsum_psi_a_epi16(psi_r_m7_p5, a_r_m7_p5, psi_i_m7_p5, a_i_m7_p5, psi_a_m7_p5); + prodsum_psi_a_epi16(psi_r_m7_p3, a_r_m7_p3, psi_i_m7_p3, a_i_m7_p3, psi_a_m7_p3); + prodsum_psi_a_epi16(psi_r_m7_p1, a_r_m7_p1, psi_i_m7_p1, a_i_m7_p1, psi_a_m7_p1); + prodsum_psi_a_epi16(psi_r_m7_m1, a_r_m7_m1, psi_i_m7_m1, a_i_m7_m1, psi_a_m7_m1); + prodsum_psi_a_epi16(psi_r_m7_m3, a_r_m7_m3, psi_i_m7_m3, a_i_m7_m3, psi_a_m7_m3); + prodsum_psi_a_epi16(psi_r_m7_m5, a_r_m7_m5, psi_i_m7_m5, a_i_m7_m5, psi_a_m7_m5); + prodsum_psi_a_epi16(psi_r_m7_m7, a_r_m7_m7, psi_i_m7_m7, a_i_m7_m7, psi_a_m7_m7); + + // Multiply by sqrt(2) + psi_a_p7_p7 = _mm256_mulhi_epi16(psi_a_p7_p7, ONE_OVER_SQRT_2); + psi_a_p7_p7 = _mm256_slli_epi16(psi_a_p7_p7, 2); + psi_a_p7_p5 = _mm256_mulhi_epi16(psi_a_p7_p5, ONE_OVER_SQRT_2); + psi_a_p7_p5 = _mm256_slli_epi16(psi_a_p7_p5, 2); + psi_a_p7_p3 = _mm256_mulhi_epi16(psi_a_p7_p3, ONE_OVER_SQRT_2); + psi_a_p7_p3 = _mm256_slli_epi16(psi_a_p7_p3, 2); + psi_a_p7_p1 = _mm256_mulhi_epi16(psi_a_p7_p1, ONE_OVER_SQRT_2); + psi_a_p7_p1 = _mm256_slli_epi16(psi_a_p7_p1, 2); + psi_a_p7_m1 = _mm256_mulhi_epi16(psi_a_p7_m1, ONE_OVER_SQRT_2); + psi_a_p7_m1 = _mm256_slli_epi16(psi_a_p7_m1, 2); + psi_a_p7_m3 = _mm256_mulhi_epi16(psi_a_p7_m3, ONE_OVER_SQRT_2); + psi_a_p7_m3 = _mm256_slli_epi16(psi_a_p7_m3, 2); + psi_a_p7_m5 = _mm256_mulhi_epi16(psi_a_p7_m5, ONE_OVER_SQRT_2); + psi_a_p7_m5 = _mm256_slli_epi16(psi_a_p7_m5, 2); + psi_a_p7_m7 = _mm256_mulhi_epi16(psi_a_p7_m7, ONE_OVER_SQRT_2); + psi_a_p7_m7 = _mm256_slli_epi16(psi_a_p7_m7, 2); + psi_a_p5_p7 = _mm256_mulhi_epi16(psi_a_p5_p7, ONE_OVER_SQRT_2); + psi_a_p5_p7 = _mm256_slli_epi16(psi_a_p5_p7, 2); + psi_a_p5_p5 = _mm256_mulhi_epi16(psi_a_p5_p5, ONE_OVER_SQRT_2); + psi_a_p5_p5 = _mm256_slli_epi16(psi_a_p5_p5, 2); + psi_a_p5_p3 = _mm256_mulhi_epi16(psi_a_p5_p3, ONE_OVER_SQRT_2); + psi_a_p5_p3 = _mm256_slli_epi16(psi_a_p5_p3, 2); + psi_a_p5_p1 = _mm256_mulhi_epi16(psi_a_p5_p1, ONE_OVER_SQRT_2); + psi_a_p5_p1 = _mm256_slli_epi16(psi_a_p5_p1, 2); + psi_a_p5_m1 = _mm256_mulhi_epi16(psi_a_p5_m1, ONE_OVER_SQRT_2); + psi_a_p5_m1 = _mm256_slli_epi16(psi_a_p5_m1, 2); + psi_a_p5_m3 = _mm256_mulhi_epi16(psi_a_p5_m3, ONE_OVER_SQRT_2); + psi_a_p5_m3 = _mm256_slli_epi16(psi_a_p5_m3, 2); + psi_a_p5_m5 = _mm256_mulhi_epi16(psi_a_p5_m5, ONE_OVER_SQRT_2); + psi_a_p5_m5 = _mm256_slli_epi16(psi_a_p5_m5, 2); + psi_a_p5_m7 = _mm256_mulhi_epi16(psi_a_p5_m7, ONE_OVER_SQRT_2); + psi_a_p5_m7 = _mm256_slli_epi16(psi_a_p5_m7, 2); + psi_a_p3_p7 = _mm256_mulhi_epi16(psi_a_p3_p7, ONE_OVER_SQRT_2); + psi_a_p3_p7 = _mm256_slli_epi16(psi_a_p3_p7, 2); + psi_a_p3_p5 = _mm256_mulhi_epi16(psi_a_p3_p5, ONE_OVER_SQRT_2); + psi_a_p3_p5 = _mm256_slli_epi16(psi_a_p3_p5, 2); + psi_a_p3_p3 = _mm256_mulhi_epi16(psi_a_p3_p3, ONE_OVER_SQRT_2); + psi_a_p3_p3 = _mm256_slli_epi16(psi_a_p3_p3, 2); + psi_a_p3_p1 = _mm256_mulhi_epi16(psi_a_p3_p1, ONE_OVER_SQRT_2); + psi_a_p3_p1 = _mm256_slli_epi16(psi_a_p3_p1, 2); + psi_a_p3_m1 = _mm256_mulhi_epi16(psi_a_p3_m1, ONE_OVER_SQRT_2); + psi_a_p3_m1 = _mm256_slli_epi16(psi_a_p3_m1, 2); + psi_a_p3_m3 = _mm256_mulhi_epi16(psi_a_p3_m3, ONE_OVER_SQRT_2); + psi_a_p3_m3 = _mm256_slli_epi16(psi_a_p3_m3, 2); + psi_a_p3_m5 = _mm256_mulhi_epi16(psi_a_p3_m5, ONE_OVER_SQRT_2); + psi_a_p3_m5 = _mm256_slli_epi16(psi_a_p3_m5, 2); + psi_a_p3_m7 = _mm256_mulhi_epi16(psi_a_p3_m7, ONE_OVER_SQRT_2); + psi_a_p3_m7 = _mm256_slli_epi16(psi_a_p3_m7, 2); + psi_a_p1_p7 = _mm256_mulhi_epi16(psi_a_p1_p7, ONE_OVER_SQRT_2); + psi_a_p1_p7 = _mm256_slli_epi16(psi_a_p1_p7, 2); + psi_a_p1_p5 = _mm256_mulhi_epi16(psi_a_p1_p5, ONE_OVER_SQRT_2); + psi_a_p1_p5 = _mm256_slli_epi16(psi_a_p1_p5, 2); + psi_a_p1_p3 = _mm256_mulhi_epi16(psi_a_p1_p3, ONE_OVER_SQRT_2); + psi_a_p1_p3 = _mm256_slli_epi16(psi_a_p1_p3, 2); + psi_a_p1_p1 = _mm256_mulhi_epi16(psi_a_p1_p1, ONE_OVER_SQRT_2); + psi_a_p1_p1 = _mm256_slli_epi16(psi_a_p1_p1, 2); + psi_a_p1_m1 = _mm256_mulhi_epi16(psi_a_p1_m1, ONE_OVER_SQRT_2); + psi_a_p1_m1 = _mm256_slli_epi16(psi_a_p1_m1, 2); + psi_a_p1_m3 = _mm256_mulhi_epi16(psi_a_p1_m3, ONE_OVER_SQRT_2); + psi_a_p1_m3 = _mm256_slli_epi16(psi_a_p1_m3, 2); + psi_a_p1_m5 = _mm256_mulhi_epi16(psi_a_p1_m5, ONE_OVER_SQRT_2); + psi_a_p1_m5 = _mm256_slli_epi16(psi_a_p1_m5, 2); + psi_a_p1_m7 = _mm256_mulhi_epi16(psi_a_p1_m7, ONE_OVER_SQRT_2); + psi_a_p1_m7 = _mm256_slli_epi16(psi_a_p1_m7, 2); + psi_a_m1_p7 = _mm256_mulhi_epi16(psi_a_m1_p7, ONE_OVER_SQRT_2); + psi_a_m1_p7 = _mm256_slli_epi16(psi_a_m1_p7, 2); + psi_a_m1_p5 = _mm256_mulhi_epi16(psi_a_m1_p5, ONE_OVER_SQRT_2); + psi_a_m1_p5 = _mm256_slli_epi16(psi_a_m1_p5, 2); + psi_a_m1_p3 = _mm256_mulhi_epi16(psi_a_m1_p3, ONE_OVER_SQRT_2); + psi_a_m1_p3 = _mm256_slli_epi16(psi_a_m1_p3, 2); + psi_a_m1_p1 = _mm256_mulhi_epi16(psi_a_m1_p1, ONE_OVER_SQRT_2); + psi_a_m1_p1 = _mm256_slli_epi16(psi_a_m1_p1, 2); + psi_a_m1_m1 = _mm256_mulhi_epi16(psi_a_m1_m1, ONE_OVER_SQRT_2); + psi_a_m1_m1 = _mm256_slli_epi16(psi_a_m1_m1, 2); + psi_a_m1_m3 = _mm256_mulhi_epi16(psi_a_m1_m3, ONE_OVER_SQRT_2); + psi_a_m1_m3 = _mm256_slli_epi16(psi_a_m1_m3, 2); + psi_a_m1_m5 = _mm256_mulhi_epi16(psi_a_m1_m5, ONE_OVER_SQRT_2); + psi_a_m1_m5 = _mm256_slli_epi16(psi_a_m1_m5, 2); + psi_a_m1_m7 = _mm256_mulhi_epi16(psi_a_m1_m7, ONE_OVER_SQRT_2); + psi_a_m1_m7 = _mm256_slli_epi16(psi_a_m1_m7, 2); + psi_a_m3_p7 = _mm256_mulhi_epi16(psi_a_m3_p7, ONE_OVER_SQRT_2); + psi_a_m3_p7 = _mm256_slli_epi16(psi_a_m3_p7, 2); + psi_a_m3_p5 = _mm256_mulhi_epi16(psi_a_m3_p5, ONE_OVER_SQRT_2); + psi_a_m3_p5 = _mm256_slli_epi16(psi_a_m3_p5, 2); + psi_a_m3_p3 = _mm256_mulhi_epi16(psi_a_m3_p3, ONE_OVER_SQRT_2); + psi_a_m3_p3 = _mm256_slli_epi16(psi_a_m3_p3, 2); + psi_a_m3_p1 = _mm256_mulhi_epi16(psi_a_m3_p1, ONE_OVER_SQRT_2); + psi_a_m3_p1 = _mm256_slli_epi16(psi_a_m3_p1, 2); + psi_a_m3_m1 = _mm256_mulhi_epi16(psi_a_m3_m1, ONE_OVER_SQRT_2); + psi_a_m3_m1 = _mm256_slli_epi16(psi_a_m3_m1, 2); + psi_a_m3_m3 = _mm256_mulhi_epi16(psi_a_m3_m3, ONE_OVER_SQRT_2); + psi_a_m3_m3 = _mm256_slli_epi16(psi_a_m3_m3, 2); + psi_a_m3_m5 = _mm256_mulhi_epi16(psi_a_m3_m5, ONE_OVER_SQRT_2); + psi_a_m3_m5 = _mm256_slli_epi16(psi_a_m3_m5, 2); + psi_a_m3_m7 = _mm256_mulhi_epi16(psi_a_m3_m7, ONE_OVER_SQRT_2); + psi_a_m3_m7 = _mm256_slli_epi16(psi_a_m3_m7, 2); + psi_a_m5_p7 = _mm256_mulhi_epi16(psi_a_m5_p7, ONE_OVER_SQRT_2); + psi_a_m5_p7 = _mm256_slli_epi16(psi_a_m5_p7, 2); + psi_a_m5_p5 = _mm256_mulhi_epi16(psi_a_m5_p5, ONE_OVER_SQRT_2); + psi_a_m5_p5 = _mm256_slli_epi16(psi_a_m5_p5, 2); + psi_a_m5_p3 = _mm256_mulhi_epi16(psi_a_m5_p3, ONE_OVER_SQRT_2); + psi_a_m5_p3 = _mm256_slli_epi16(psi_a_m5_p3, 2); + psi_a_m5_p1 = _mm256_mulhi_epi16(psi_a_m5_p1, ONE_OVER_SQRT_2); + psi_a_m5_p1 = _mm256_slli_epi16(psi_a_m5_p1, 2); + psi_a_m5_m1 = _mm256_mulhi_epi16(psi_a_m5_m1, ONE_OVER_SQRT_2); + psi_a_m5_m1 = _mm256_slli_epi16(psi_a_m5_m1, 2); + psi_a_m5_m3 = _mm256_mulhi_epi16(psi_a_m5_m3, ONE_OVER_SQRT_2); + psi_a_m5_m3 = _mm256_slli_epi16(psi_a_m5_m3, 2); + psi_a_m5_m5 = _mm256_mulhi_epi16(psi_a_m5_m5, ONE_OVER_SQRT_2); + psi_a_m5_m5 = _mm256_slli_epi16(psi_a_m5_m5, 2); + psi_a_m5_m7 = _mm256_mulhi_epi16(psi_a_m5_m7, ONE_OVER_SQRT_2); + psi_a_m5_m7 = _mm256_slli_epi16(psi_a_m5_m7, 2); + psi_a_m7_p7 = _mm256_mulhi_epi16(psi_a_m7_p7, ONE_OVER_SQRT_2); + psi_a_m7_p7 = _mm256_slli_epi16(psi_a_m7_p7, 2); + psi_a_m7_p5 = _mm256_mulhi_epi16(psi_a_m7_p5, ONE_OVER_SQRT_2); + psi_a_m7_p5 = _mm256_slli_epi16(psi_a_m7_p5, 2); + psi_a_m7_p3 = _mm256_mulhi_epi16(psi_a_m7_p3, ONE_OVER_SQRT_2); + psi_a_m7_p3 = _mm256_slli_epi16(psi_a_m7_p3, 2); + psi_a_m7_p1 = _mm256_mulhi_epi16(psi_a_m7_p1, ONE_OVER_SQRT_2); + psi_a_m7_p1 = _mm256_slli_epi16(psi_a_m7_p1, 2); + psi_a_m7_m1 = _mm256_mulhi_epi16(psi_a_m7_m1, ONE_OVER_SQRT_2); + psi_a_m7_m1 = _mm256_slli_epi16(psi_a_m7_m1, 2); + psi_a_m7_m3 = _mm256_mulhi_epi16(psi_a_m7_m3, ONE_OVER_SQRT_2); + psi_a_m7_m3 = _mm256_slli_epi16(psi_a_m7_m3, 2); + psi_a_m7_m5 = _mm256_mulhi_epi16(psi_a_m7_m5, ONE_OVER_SQRT_2); + psi_a_m7_m5 = _mm256_slli_epi16(psi_a_m7_m5, 2); + psi_a_m7_m7 = _mm256_mulhi_epi16(psi_a_m7_m7, ONE_OVER_SQRT_2); + psi_a_m7_m7 = _mm256_slli_epi16(psi_a_m7_m7, 2); + + // Calculation of a group of two terms in the bit metric involving squares of interference + square_a_64qam_epi16(a_r_p7_p7, a_i_p7_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p7); + square_a_64qam_epi16(a_r_p7_p5, a_i_p7_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p5); + square_a_64qam_epi16(a_r_p7_p3, a_i_p7_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p3); + square_a_64qam_epi16(a_r_p7_p1, a_i_p7_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_p1); + square_a_64qam_epi16(a_r_p7_m1, a_i_p7_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m1); + square_a_64qam_epi16(a_r_p7_m3, a_i_p7_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m3); + square_a_64qam_epi16(a_r_p7_m5, a_i_p7_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m5); + square_a_64qam_epi16(a_r_p7_m7, a_i_p7_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p7_m7); + square_a_64qam_epi16(a_r_p5_p7, a_i_p5_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p7); + square_a_64qam_epi16(a_r_p5_p5, a_i_p5_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p5); + square_a_64qam_epi16(a_r_p5_p3, a_i_p5_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p3); + square_a_64qam_epi16(a_r_p5_p1, a_i_p5_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_p1); + square_a_64qam_epi16(a_r_p5_m1, a_i_p5_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m1); + square_a_64qam_epi16(a_r_p5_m3, a_i_p5_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m3); + square_a_64qam_epi16(a_r_p5_m5, a_i_p5_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m5); + square_a_64qam_epi16(a_r_p5_m7, a_i_p5_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p5_m7); + square_a_64qam_epi16(a_r_p3_p7, a_i_p3_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p7); + square_a_64qam_epi16(a_r_p3_p5, a_i_p3_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p5); + square_a_64qam_epi16(a_r_p3_p3, a_i_p3_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p3); + square_a_64qam_epi16(a_r_p3_p1, a_i_p3_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_p1); + square_a_64qam_epi16(a_r_p3_m1, a_i_p3_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m1); + square_a_64qam_epi16(a_r_p3_m3, a_i_p3_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m3); + square_a_64qam_epi16(a_r_p3_m5, a_i_p3_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m5); + square_a_64qam_epi16(a_r_p3_m7, a_i_p3_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p3_m7); + square_a_64qam_epi16(a_r_p1_p7, a_i_p1_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p7); + square_a_64qam_epi16(a_r_p1_p5, a_i_p1_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p5); + square_a_64qam_epi16(a_r_p1_p3, a_i_p1_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p3); + square_a_64qam_epi16(a_r_p1_p1, a_i_p1_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_p1); + square_a_64qam_epi16(a_r_p1_m1, a_i_p1_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m1); + square_a_64qam_epi16(a_r_p1_m3, a_i_p1_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m3); + square_a_64qam_epi16(a_r_p1_m5, a_i_p1_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m5); + square_a_64qam_epi16(a_r_p1_m7, a_i_p1_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_p1_m7); + square_a_64qam_epi16(a_r_m1_p7, a_i_m1_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p7); + square_a_64qam_epi16(a_r_m1_p5, a_i_m1_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p5); + square_a_64qam_epi16(a_r_m1_p3, a_i_m1_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p3); + square_a_64qam_epi16(a_r_m1_p1, a_i_m1_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_p1); + square_a_64qam_epi16(a_r_m1_m1, a_i_m1_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m1); + square_a_64qam_epi16(a_r_m1_m3, a_i_m1_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m3); + square_a_64qam_epi16(a_r_m1_m5, a_i_m1_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m5); + square_a_64qam_epi16(a_r_m1_m7, a_i_m1_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m1_m7); + square_a_64qam_epi16(a_r_m3_p7, a_i_m3_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p7); + square_a_64qam_epi16(a_r_m3_p5, a_i_m3_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p5); + square_a_64qam_epi16(a_r_m3_p3, a_i_m3_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p3); + square_a_64qam_epi16(a_r_m3_p1, a_i_m3_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_p1); + square_a_64qam_epi16(a_r_m3_m1, a_i_m3_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m1); + square_a_64qam_epi16(a_r_m3_m3, a_i_m3_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m3); + square_a_64qam_epi16(a_r_m3_m5, a_i_m3_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m5); + square_a_64qam_epi16(a_r_m3_m7, a_i_m3_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m3_m7); + square_a_64qam_epi16(a_r_m5_p7, a_i_m5_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p7); + square_a_64qam_epi16(a_r_m5_p5, a_i_m5_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p5); + square_a_64qam_epi16(a_r_m5_p3, a_i_m5_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p3); + square_a_64qam_epi16(a_r_m5_p1, a_i_m5_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_p1); + square_a_64qam_epi16(a_r_m5_m1, a_i_m5_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m1); + square_a_64qam_epi16(a_r_m5_m3, a_i_m5_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m3); + square_a_64qam_epi16(a_r_m5_m5, a_i_m5_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m5); + square_a_64qam_epi16(a_r_m5_m7, a_i_m5_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m5_m7); + square_a_64qam_epi16(a_r_m7_p7, a_i_m7_p7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p7); + square_a_64qam_epi16(a_r_m7_p5, a_i_m7_p5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p5); + square_a_64qam_epi16(a_r_m7_p3, a_i_m7_p3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p3); + square_a_64qam_epi16(a_r_m7_p1, a_i_m7_p1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_p1); + square_a_64qam_epi16(a_r_m7_m1, a_i_m7_m1, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m1); + square_a_64qam_epi16(a_r_m7_m3, a_i_m7_m3, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m3); + square_a_64qam_epi16(a_r_m7_m5, a_i_m7_m5, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m5); + square_a_64qam_epi16(a_r_m7_m7, a_i_m7_m7, ch_mag_int, SQRT_42_OVER_FOUR, a_sq_m7_m7); + + // Computing different multiples of ||h0||^2 + // x=1, y=1 + ch_mag_2_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,ONE_OVER_FOUR_SQRT_42); + ch_mag_2_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_2_over_42_with_sigma2,1); + // x=1, y=3 + ch_mag_10_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,FIVE_OVER_FOUR_SQRT_42); + ch_mag_10_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_10_over_42_with_sigma2,1); + // x=1, x=5 + ch_mag_26_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,THIRTEEN_OVER_FOUR_SQRT_42); + ch_mag_26_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_26_over_42_with_sigma2,1); + // x=1, y=7 + ch_mag_50_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42); + ch_mag_50_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_50_over_42_with_sigma2,1); + // x=3, y=3 + ch_mag_18_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,NINE_OVER_FOUR_SQRT_42); + ch_mag_18_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_18_over_42_with_sigma2,1); + // x=3, y=5 + ch_mag_34_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,SEVENTEEN_OVER_FOUR_SQRT_42); + ch_mag_34_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_34_over_42_with_sigma2,1); + // x=3, y=7 + ch_mag_58_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYNINE_OVER_FOUR_SQRT_42); + ch_mag_58_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_58_over_42_with_sigma2,2); + // x=5, y=5 + ch_mag_50_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,TWENTYFIVE_OVER_FOUR_SQRT_42); + ch_mag_50_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_50_over_42_with_sigma2,1); + // x=5, y=7 + ch_mag_74_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,THIRTYSEVEN_OVER_FOUR_SQRT_42); + ch_mag_74_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_74_over_42_with_sigma2,2); + // x=7, y=7 + ch_mag_98_over_42_with_sigma2 = _mm256_mulhi_epi16(ch_mag_des,FORTYNINE_OVER_FOUR_SQRT_42); + ch_mag_98_over_42_with_sigma2 = _mm256_slli_epi16(ch_mag_98_over_42_with_sigma2,2); + + // Computing Metrics + xmm0 = _mm256_subs_epi16(psi_a_p7_p7, a_sq_p7_p7); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_7); + bit_met_p7_p7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_p5, a_sq_p7_p5); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_5); + bit_met_p7_p5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_p3, a_sq_p7_p3); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_3); + bit_met_p7_p3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_p1, a_sq_p7_p1); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_7_1); + bit_met_p7_p1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_m1, a_sq_p7_m1); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_1); + bit_met_p7_m1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_m3, a_sq_p7_m3); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_3); + bit_met_p7_m3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_m5, a_sq_p7_m5); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_5); + bit_met_p7_m5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p7_m7, a_sq_p7_m7); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_7_7); + bit_met_p7_m7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_p7, a_sq_p5_p7); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_7); + bit_met_p5_p7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_p5, a_sq_p5_p5); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_5); + bit_met_p5_p5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_p3, a_sq_p5_p3); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_3); + bit_met_p5_p3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_p1, a_sq_p5_p1); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_5_1); + bit_met_p5_p1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_m1, a_sq_p5_m1); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_1); + bit_met_p5_m1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_m3, a_sq_p5_m3); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_3); + bit_met_p5_m3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_m5, a_sq_p5_m5); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_5); + bit_met_p5_m5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p5_m7, a_sq_p5_m7); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_5_7); + bit_met_p5_m7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_p7, a_sq_p3_p7); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_7); + bit_met_p3_p7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_p5, a_sq_p3_p5); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_5); + bit_met_p3_p5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_p3, a_sq_p3_p3); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_3); + bit_met_p3_p3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_p1, a_sq_p3_p1); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_3_1); + bit_met_p3_p1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_m1, a_sq_p3_m1); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_1); + bit_met_p3_m1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_m3, a_sq_p3_m3); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_3); + bit_met_p3_m3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_m5, a_sq_p3_m5); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_5); + bit_met_p3_m5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p3_m7, a_sq_p3_m7); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_3_7); + bit_met_p3_m7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_p7, a_sq_p1_p7); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_7); + bit_met_p1_p7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_p5, a_sq_p1_p5); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_5); + bit_met_p1_p5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_p3, a_sq_p1_p3); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_3); + bit_met_p1_p3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_p1, a_sq_p1_p1); + xmm1 = _mm256_adds_epi16(xmm0, y0_p_1_1); + bit_met_p1_p1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_m1, a_sq_p1_m1); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_1); + bit_met_p1_m1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_m3, a_sq_p1_m3); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_3); + bit_met_p1_m3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_m5, a_sq_p1_m5); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_5); + bit_met_p1_m5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_p1_m7, a_sq_p1_m7); + xmm1 = _mm256_adds_epi16(xmm0, y0_m_1_7); + bit_met_p1_m7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + + xmm0 = _mm256_subs_epi16(psi_a_m1_p7, a_sq_m1_p7); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_7); + bit_met_m1_p7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_p5, a_sq_m1_p5); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_5); + bit_met_m1_p5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_p3, a_sq_m1_p3); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_3); + bit_met_m1_p3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_p1, a_sq_m1_p1); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_1_1); + bit_met_m1_p1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_m1, a_sq_m1_m1); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_1); + bit_met_m1_m1 = _mm256_subs_epi16(xmm1, ch_mag_2_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_m3, a_sq_m1_m3); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_3); + bit_met_m1_m3 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_m5, a_sq_m1_m5); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_5); + bit_met_m1_m5 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m1_m7, a_sq_m1_m7); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_1_7); + bit_met_m1_m7 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_p7, a_sq_m3_p7); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_7); + bit_met_m3_p7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_p5, a_sq_m3_p5); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_5); + bit_met_m3_p5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_p3, a_sq_m3_p3); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_3); + bit_met_m3_p3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_p1, a_sq_m3_p1); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_3_1); + bit_met_m3_p1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_m1, a_sq_m3_m1); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_1); + bit_met_m3_m1 = _mm256_subs_epi16(xmm1, ch_mag_10_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_m3, a_sq_m3_m3); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_3); + bit_met_m3_m3 = _mm256_subs_epi16(xmm1, ch_mag_18_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_m5, a_sq_m3_m5); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_5); + bit_met_m3_m5 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m3_m7, a_sq_m3_m7); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_3_7); + bit_met_m3_m7 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_p7, a_sq_m5_p7); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_7); + bit_met_m5_p7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_p5, a_sq_m5_p5); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_5); + bit_met_m5_p5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_p3, a_sq_m5_p3); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_3); + bit_met_m5_p3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_p1, a_sq_m5_p1); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_5_1); + bit_met_m5_p1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_m1, a_sq_m5_m1); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_1); + bit_met_m5_m1 = _mm256_subs_epi16(xmm1, ch_mag_26_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_m3, a_sq_m5_m3); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_3); + bit_met_m5_m3 = _mm256_subs_epi16(xmm1, ch_mag_34_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_m5, a_sq_m5_m5); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_5); + bit_met_m5_m5 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m5_m7, a_sq_m5_m7); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_5_7); + bit_met_m5_m7 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_p7, a_sq_m7_p7); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_7); + bit_met_m7_p7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_p5, a_sq_m7_p5); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_5); + bit_met_m7_p5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_p3, a_sq_m7_p3); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_3); + bit_met_m7_p3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_p1, a_sq_m7_p1); + xmm1 = _mm256_subs_epi16(xmm0, y0_m_7_1); + bit_met_m7_p1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_m1, a_sq_m7_m1); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_1); + bit_met_m7_m1 = _mm256_subs_epi16(xmm1, ch_mag_50_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_m3, a_sq_m7_m3); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_3); + bit_met_m7_m3 = _mm256_subs_epi16(xmm1, ch_mag_58_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_m5, a_sq_m7_m5); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_5); + bit_met_m7_m5 = _mm256_subs_epi16(xmm1, ch_mag_74_over_42_with_sigma2); + xmm0 = _mm256_subs_epi16(psi_a_m7_m7, a_sq_m7_m7); + xmm1 = _mm256_subs_epi16(xmm0, y0_p_7_7); + bit_met_m7_m7 = _mm256_subs_epi16(xmm1, ch_mag_98_over_42_with_sigma2); + + // Detection for 1st bit (LTE mapping) + // bit = 1 + xmm0 = _mm256_max_epi16(bit_met_m7_p7, bit_met_m7_p5); + xmm1 = _mm256_max_epi16(bit_met_m7_p3, bit_met_m7_p1); + xmm2 = _mm256_max_epi16(bit_met_m7_m1, bit_met_m7_m3); + xmm3 = _mm256_max_epi16(bit_met_m7_m5, bit_met_m7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m5_p5); + xmm1 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m5_p1); + xmm2 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m5_m3); + xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m5_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m3_p7, bit_met_m3_p5); + xmm1 = _mm256_max_epi16(bit_met_m3_p3, bit_met_m3_p1); + xmm2 = _mm256_max_epi16(bit_met_m3_m1, bit_met_m3_m3); + xmm3 = _mm256_max_epi16(bit_met_m3_m5, bit_met_m3_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m1_p5); + xmm1 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m1_p1); + xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m1_m3); + xmm3 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m1_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + // bit = 0 + xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p7_p5); + xmm1 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p7_p1); + xmm2 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p7_m3); + xmm3 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p5_p7, bit_met_p5_p5); + xmm1 = _mm256_max_epi16(bit_met_p5_p3, bit_met_p5_p1); + xmm2 = _mm256_max_epi16(bit_met_p5_m1, bit_met_p5_m3); + xmm3 = _mm256_max_epi16(bit_met_p5_m5, bit_met_p5_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p3_p5); + xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p3_p1); + xmm2 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p3_m3); + xmm3 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p3_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p1_p7, bit_met_p1_p5); + xmm1 = _mm256_max_epi16(bit_met_p1_p3, bit_met_p1_p1); + xmm2 = _mm256_max_epi16(bit_met_p1_m1, bit_met_p1_m3); + xmm3 = _mm256_max_epi16(bit_met_p1_m5, bit_met_p1_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y0r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 2nd bit (LTE mapping) + // bit = 1 + xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1); + xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1); + xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3); + xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3); + xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5); + xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5); + xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7); + xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7); + xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7); + xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + // bit = 0 + xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1); + xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1); + xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m7_p1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3); + xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3); + xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5); + xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5); + xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m7_p5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7); + xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7); + xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7); + xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y1r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 3rd bit (LTE mapping) + xmm0 = _mm256_max_epi16(bit_met_m7_m7, bit_met_m7_m5); + xmm1 = _mm256_max_epi16(bit_met_m7_m3, bit_met_m7_m1); + xmm2 = _mm256_max_epi16(bit_met_m7_p1, bit_met_m7_p3); + xmm3 = _mm256_max_epi16(bit_met_m7_p5, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m5_m5); + xmm1 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m5_m1); + xmm2 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p5_m7, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p5_m3, bit_met_p5_m1); + xmm2 = _mm256_max_epi16(bit_met_p5_p1, bit_met_p5_p3); + xmm3 = _mm256_max_epi16(bit_met_p5_p5, bit_met_p5_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p7_m5); + xmm1 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p7_m1); + xmm2 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p7_p3); + xmm3 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm256_max_epi16(bit_met_m3_m7, bit_met_m3_m5); + xmm1 = _mm256_max_epi16(bit_met_m3_m3, bit_met_m3_m1); + xmm2 = _mm256_max_epi16(bit_met_m3_p1, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m3_p5, bit_met_m3_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m1_m5); + xmm1 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m1_p3); + xmm3 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m1_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p1_m7, bit_met_p1_m5); + xmm1 = _mm256_max_epi16(bit_met_p1_m3, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_p1_p1, bit_met_p1_p3); + xmm3 = _mm256_max_epi16(bit_met_p1_p5, bit_met_p1_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p3_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p3_m1); + xmm2 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p3_p3); + xmm3 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p3_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y2r = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 4th bit (LTE mapping) + xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7); + xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7); + xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7); + xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5); + xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5); + xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5); + xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5); + xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7); + xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7); + xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7); + xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1); + xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1); + xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3); + xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3); + xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1); + xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1); + xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m7_p1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3); + xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3); + xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y0i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + + // Detection for 5th bit (LTE mapping) + xmm0 = _mm256_max_epi16(bit_met_m7_m7, bit_met_m7_m5); + xmm1 = _mm256_max_epi16(bit_met_m7_m3, bit_met_m7_m1); + xmm2 = _mm256_max_epi16(bit_met_m7_p1, bit_met_m7_p3); + xmm3 = _mm256_max_epi16(bit_met_m7_p5, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m1_m5); + xmm1 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m1_p3); + xmm3 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m1_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p1_m7, bit_met_p1_m5); + xmm1 = _mm256_max_epi16(bit_met_p1_m3, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_p1_p1, bit_met_p1_p3); + xmm3 = _mm256_max_epi16(bit_met_p1_p5, bit_met_p1_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p7_m5); + xmm1 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p7_m1); + xmm2 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p7_p3); + xmm3 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m5_m5); + xmm1 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m5_m1); + xmm2 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m5_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_m3_m7, bit_met_m3_m5); + xmm1 = _mm256_max_epi16(bit_met_m3_m3, bit_met_m3_m1); + xmm2 = _mm256_max_epi16(bit_met_m3_p1, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m3_p5, bit_met_m3_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p3_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p3_m1); + xmm2 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p3_p3); + xmm3 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p3_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p5_m7, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p5_m3, bit_met_p5_m1); + xmm2 = _mm256_max_epi16(bit_met_p5_p1, bit_met_p5_p3); + xmm3 = _mm256_max_epi16(bit_met_p5_p5, bit_met_p5_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y1i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // Detection for 6th bit (LTE mapping) + xmm0 = _mm256_max_epi16(bit_met_p7_p7, bit_met_p5_p7); + xmm1 = _mm256_max_epi16(bit_met_p3_p7, bit_met_p1_p7); + xmm2 = _mm256_max_epi16(bit_met_m1_p7, bit_met_m3_p7); + xmm3 = _mm256_max_epi16(bit_met_m5_p7, bit_met_m7_p7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p1, bit_met_p5_p1); + xmm1 = _mm256_max_epi16(bit_met_p3_p1, bit_met_p1_p1); + xmm2 = _mm256_max_epi16(bit_met_m1_p1, bit_met_m3_p1); + xmm3 = _mm256_max_epi16(bit_met_m5_p1, bit_met_m5_p1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m1, bit_met_p5_m1); + xmm1 = _mm256_max_epi16(bit_met_p3_m1, bit_met_p1_m1); + xmm2 = _mm256_max_epi16(bit_met_m1_m1, bit_met_m3_m1); + xmm3 = _mm256_max_epi16(bit_met_m5_m1, bit_met_m7_m1); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m7, bit_met_p5_m7); + xmm1 = _mm256_max_epi16(bit_met_p3_m7, bit_met_p1_m7); + xmm2 = _mm256_max_epi16(bit_met_m1_m7, bit_met_m3_m7); + xmm3 = _mm256_max_epi16(bit_met_m5_m7, bit_met_m7_m7); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm4); + logmax_den_re0 = _mm256_max_epi16(logmax_den_re0, xmm5); + + xmm0 = _mm256_max_epi16(bit_met_p7_m5, bit_met_p5_m5); + xmm1 = _mm256_max_epi16(bit_met_p3_m5, bit_met_p1_m5); + xmm2 = _mm256_max_epi16(bit_met_m1_m5, bit_met_m3_m5); + xmm3 = _mm256_max_epi16(bit_met_m5_m5, bit_met_m7_m5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(xmm4, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_m3, bit_met_p5_m3); + xmm1 = _mm256_max_epi16(bit_met_p3_m3, bit_met_p1_m3); + xmm2 = _mm256_max_epi16(bit_met_m1_m3, bit_met_m3_m3); + xmm3 = _mm256_max_epi16(bit_met_m5_m3, bit_met_m7_m3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p3, bit_met_p5_p3); + xmm1 = _mm256_max_epi16(bit_met_p3_p3, bit_met_p1_p3); + xmm2 = _mm256_max_epi16(bit_met_m1_p3, bit_met_m3_p3); + xmm3 = _mm256_max_epi16(bit_met_m5_p3, bit_met_m7_p3); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + xmm0 = _mm256_max_epi16(bit_met_p7_p5, bit_met_p5_p5); + xmm1 = _mm256_max_epi16(bit_met_p3_p5, bit_met_p1_p5); + xmm2 = _mm256_max_epi16(bit_met_m1_p5, bit_met_m3_p5); + xmm3 = _mm256_max_epi16(bit_met_m5_p5, bit_met_m7_p5); + xmm4 = _mm256_max_epi16(xmm0, xmm1); + xmm5 = _mm256_max_epi16(xmm2, xmm3); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm4); + logmax_num_re0 = _mm256_max_epi16(logmax_num_re0, xmm5); + + y2i = _mm256_subs_epi16(logmax_num_re0, logmax_den_re0); + + // map to output stream, difficult to do in SIMD since we have 6 16bit LLRs + // RE 1 + j = 48*i; + stream0_out[j + 0] = ((short *)&y0r)[0]; + stream0_out[j + 1] = ((short *)&y1r)[0]; + stream0_out[j + 2] = ((short *)&y2r)[0]; + stream0_out[j + 3] = ((short *)&y0i)[0]; + stream0_out[j + 4] = ((short *)&y1i)[0]; + stream0_out[j + 5] = ((short *)&y2i)[0]; + // RE 2 + stream0_out[j + 6] = ((short *)&y0r)[1]; + stream0_out[j + 7] = ((short *)&y1r)[1]; + stream0_out[j + 8] = ((short *)&y2r)[1]; + stream0_out[j + 9] = ((short *)&y0i)[1]; + stream0_out[j + 10] = ((short *)&y1i)[1]; + stream0_out[j + 11] = ((short *)&y2i)[1]; + // RE 3 + stream0_out[j + 12] = ((short *)&y0r)[2]; + stream0_out[j + 13] = ((short *)&y1r)[2]; + stream0_out[j + 14] = ((short *)&y2r)[2]; + stream0_out[j + 15] = ((short *)&y0i)[2]; + stream0_out[j + 16] = ((short *)&y1i)[2]; + stream0_out[j + 17] = ((short *)&y2i)[2]; + // RE 4 + stream0_out[j + 18] = ((short *)&y0r)[3]; + stream0_out[j + 19] = ((short *)&y1r)[3]; + stream0_out[j + 20] = ((short *)&y2r)[3]; + stream0_out[j + 21] = ((short *)&y0i)[3]; + stream0_out[j + 22] = ((short *)&y1i)[3]; + stream0_out[j + 23] = ((short *)&y2i)[3]; + // RE 5 + stream0_out[j + 24] = ((short *)&y0r)[4]; + stream0_out[j + 25] = ((short *)&y1r)[4]; + stream0_out[j + 26] = ((short *)&y2r)[4]; + stream0_out[j + 27] = ((short *)&y0i)[4]; + stream0_out[j + 28] = ((short *)&y1i)[4]; + stream0_out[j + 29] = ((short *)&y2i)[4]; + // RE 6 + stream0_out[j + 30] = ((short *)&y0r)[5]; + stream0_out[j + 31] = ((short *)&y1r)[5]; + stream0_out[j + 32] = ((short *)&y2r)[5]; + stream0_out[j + 33] = ((short *)&y0i)[5]; + stream0_out[j + 34] = ((short *)&y1i)[5]; + stream0_out[j + 35] = ((short *)&y2i)[5]; + // RE 7 + stream0_out[j + 36] = ((short *)&y0r)[6]; + stream0_out[j + 37] = ((short *)&y1r)[6]; + stream0_out[j + 38] = ((short *)&y2r)[6]; + stream0_out[j + 39] = ((short *)&y0i)[6]; + stream0_out[j + 40] = ((short *)&y1i)[6]; + stream0_out[j + 41] = ((short *)&y2i)[6]; + // RE 8 + stream0_out[j + 42] = ((short *)&y0r)[7]; + stream0_out[j + 43] = ((short *)&y1r)[7]; + stream0_out[j + 44] = ((short *)&y2r)[7]; + stream0_out[j + 45] = ((short *)&y0i)[7]; + stream0_out[j + 46] = ((short *)&y1i)[7]; + stream0_out[j + 47] = ((short *)&y2i)[7]; + + // RE 9 + stream0_out[j + 48] = ((short *)&y0r)[8]; + stream0_out[j + 49] = ((short *)&y1r)[8]; + stream0_out[j + 50] = ((short *)&y2r)[8]; + stream0_out[j + 51] = ((short *)&y0i)[8]; + stream0_out[j + 52] = ((short *)&y1i)[8]; + stream0_out[j + 53] = ((short *)&y2i)[8]; + // RE 10 + stream0_out[j + 54] = ((short *)&y0r)[9]; + stream0_out[j + 55] = ((short *)&y1r)[9]; + stream0_out[j + 56] = ((short *)&y2r)[9]; + stream0_out[j + 57] = ((short *)&y0i)[9]; + stream0_out[j + 58] = ((short *)&y1i)[9]; + stream0_out[j + 59] = ((short *)&y2i)[9]; + // RE 11 + stream0_out[j + 60] = ((short *)&y0r)[10]; + stream0_out[j + 61] = ((short *)&y1r)[10]; + stream0_out[j + 62] = ((short *)&y2r)[10]; + stream0_out[j + 63] = ((short *)&y0i)[10]; + stream0_out[j + 64] = ((short *)&y1i)[10]; + stream0_out[j + 65] = ((short *)&y2i)[10]; + // RE 12 + stream0_out[j + 66] = ((short *)&y0r)[11]; + stream0_out[j + 67] = ((short *)&y1r)[11]; + stream0_out[j + 68] = ((short *)&y2r)[11]; + stream0_out[j + 69] = ((short *)&y0i)[11]; + stream0_out[j + 70] = ((short *)&y1i)[11]; + stream0_out[j + 71] = ((short *)&y2i)[11]; + // RE 13 + stream0_out[j + 72] = ((short *)&y0r)[12]; + stream0_out[j + 73] = ((short *)&y1r)[12]; + stream0_out[j + 74] = ((short *)&y2r)[12]; + stream0_out[j + 75] = ((short *)&y0i)[12]; + stream0_out[j + 76] = ((short *)&y1i)[12]; + stream0_out[j + 77] = ((short *)&y2i)[12]; + // RE 14 + stream0_out[j + 78] = ((short *)&y0r)[13]; + stream0_out[j + 79] = ((short *)&y1r)[13]; + stream0_out[j + 80] = ((short *)&y2r)[13]; + stream0_out[j + 81] = ((short *)&y0i)[13]; + stream0_out[j + 82] = ((short *)&y1i)[13]; + stream0_out[j + 83] = ((short *)&y2i)[13]; + // RE 15 + stream0_out[j + 84] = ((short *)&y0r)[14]; + stream0_out[j + 85] = ((short *)&y1r)[14]; + stream0_out[j + 86] = ((short *)&y2r)[14]; + stream0_out[j + 87] = ((short *)&y0i)[14]; + stream0_out[j + 88] = ((short *)&y1i)[14]; + stream0_out[j + 89] = ((short *)&y2i)[14]; + // RE 16 + stream0_out[j + 90] = ((short *)&y0r)[15]; + stream0_out[j + 91] = ((short *)&y1r)[15]; + stream0_out[j + 92] = ((short *)&y2r)[15]; + stream0_out[j + 93] = ((short *)&y0i)[15]; + stream0_out[j + 94] = ((short *)&y1i)[15]; + stream0_out[j + 95] = ((short *)&y2i)[15]; + +#elif defined(__arm__) + +#endif + + } + +#if defined(__x86_64__) || defined(__i386__) + _mm_empty(); + _m_empty(); +#endif +} diff --git a/openair1/PHY/LTE_TRANSPORT/proto.h b/openair1/PHY/LTE_TRANSPORT/proto.h index a5aa145e176febfd50ee06cd3eb0610bc747a36d..fe7991e343ab200cf96d8d5f7a34f66a29a8fba0 100644 --- a/openair1/PHY/LTE_TRANSPORT/proto.h +++ b/openair1/PHY/LTE_TRANSPORT/proto.h @@ -704,6 +704,22 @@ void qam64_qam16(short *stream0_in, short *rho01, int length); +/** \brief This function computes the LLRs for ML (max-logsum approximation) dual-stream 64QAM/16QAM reception. + @param stream0_in Input from channel compensated (MR combined) stream 0 + @param stream1_in Input from channel compensated (MR combined) stream 1 + @param ch_mag Input from scaled channel magnitude square of h0'*g0 + @param ch_mag_i Input from scaled channel magnitude square of h0'*g1 + @param stream0_out Output from LLR unit for stream0 + @param rho01 Cross-correlation between channels (MR combined) + @param length in complex channel outputs*/ +void qam64_qam16_avx2(short *stream0_in, + short *stream1_in, + short *ch_mag, + short *ch_mag_i, + short *stream0_out, + short *rho01, + int length); + /** \brief This function perform LLR computation for dual-stream (64QAM/16QAM) transmission. @param frame_parms Frame descriptor structure @param rxdataF_comp Compensated channel output